You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2019/01/08 18:35:15 UTC

[01/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 612a1d029 -> e8c65da6b
  refs/heads/branch_8x 5a60c3e0d -> 0e903cab4
  refs/heads/master 7db4121b4 -> 283b19a8d


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
new file mode 100644
index 0000000..446253d
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
@@ -0,0 +1,150 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+/) {
+    print STDERR "Usage: $script_name -v <version>\n";
+    print STDERR "\tversion must be of the form X.Y, e.g. 11.0\n"
+        if ($version);
+    exit 1;
+}
+my $url = "http://www.unicode.org/Public/emoji/${version}/emoji-test.txt";
+my $underscore_version = $version;
+$underscore_version =~ s/\./_/g;
+my $class_name = "EmojiTokenizationTestUnicode_${underscore_version}";
+my $output_filename = "${class_name}.java";
+my $header =<<"__HEADER__";
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by ${script_name}
+ * from: ${url}
+ *
+ * emoji-test.txt contains emoji char sequences, which are represented as
+ * tokenization tests in this class.
+ * 
+ */
+\@Ignore
+public class ${class_name} extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    for (int i = 0 ; i < tests.length ; i += 2) {
+      String test = tests[i + 1];
+      try {
+        assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
+      } catch (Throwable t) {
+        throw new Exception("Failed to tokenize \\"" + tests[i] + "\\":", t);        
+      }
+    }
+  }
+
+  private String[] tests = new String[] {
+__HEADER__
+
+my @tests = split /\r?\n/, get_URL_content($url);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+open OUT, ">$output_path"
+    || die "Error opening '$output_path' for writing: $!";
+
+print STDERR "Writing '$output_path'...";
+
+print OUT $header;
+
+my $isFirst = 1;
+for my $line (@tests) {
+    next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
+
+    print OUT ",\n\n" unless $isFirst;
+    $isFirst = 0;
+
+    # Example line: 1F46E 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚๏ธ man police officer: light skin tone
+    $line =~ s/\s+$//;     # Trim trailing whitespace
+    $line =~ s/\t/  /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
+    print OUT "    \"$line\",\n";
+    my ($test_string) = $line =~ /^(.*?)\s*;/;
+    $test_string =~ s/([0-9A-F]+)/\\u$1/g;
+    $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
+    $test_string =~ s/\s//g;
+    print OUT "    \"${test_string}\"";
+}
+print OUT "  };\n}\n";
+close OUT;
+print STDERR "done.\n";
+
+
+# sub above_BMP_char_to_surrogates
+#
+# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
+# to the corresponding UTF-16 surrogate pair
+#
+# Assumption: input string is a sequence more than four hex digits
+#
+sub above_BMP_char_to_surrogates {
+    my $ch = hex(shift);
+    my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
+    my $low_surrogate  = 0xDC00 + ($ch & 0x3FF);
+    return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
+}
+
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+sub get_URL_content {
+    my $url = shift;
+    print STDERR "Retrieving '$url'...";
+    my $user_agent = LWP::UserAgent->new;
+    my $request = HTTP::Request->new(GET => $url);
+    my $response = $user_agent->request($request);
+    unless ($response->is_success) {
+        print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+        exit 1;
+    }
+    print STDERR "done.\n";
+    return $response->content;
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
index 3004035..dd16cb6 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
@@ -40,8 +40,6 @@ $underscore_version =~ s/\./_/g;
 my $class_name = "WordBreakTestUnicode_${underscore_version}";
 my $output_filename = "${class_name}.java";
 my $header =<<"__HEADER__";
-package org.apache.lucene.analysis;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -59,6 +57,8 @@ package org.apache.lucene.analysis;
  * limitations under the License.
  */
 
+package org.apache.lucene.analysis.standard;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.junit.Ignore;
@@ -81,7 +81,7 @@ import org.junit.Ignore;
  *    \\p{WordBreak = Hebrew_Letter}
  *    \\p{WordBreak = Katakana}
  *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\\uFF10-\\uFF19]                (Full-width Arabic digits)
+ *    [\\uFF10-\\uFF19]                 (Full-width Arabic digits)
  */
 \@Ignore
 public class ${class_name} extends BaseTokenStreamTestCase {
@@ -91,6 +91,7 @@ __HEADER__
 
 my $codepoints = [];
 map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
+my $regional_indicator_codepoints = [];
 # Complex_Context is an alias for 'SA', which is used in LineBreak.txt
 # Using lowercase versions of property value names to allow for case-
 # insensitive comparison with the names in the Unicode data files.
@@ -98,7 +99,9 @@ parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
 parse_Unicode_data_file($scripts_url, $codepoints, 
                         {'han' => 1, 'hiragana' => 1});
 parse_Unicode_data_file($word_break_url, $codepoints,
-                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
+                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1, 'e_base' => 1,
+                         'e_modifier' => 1, 'glue_after_zwj' => 1, 'e_base_gaz' => 1});
+parse_Unicode_data_file($word_break_url, $regional_indicator_codepoints, {'regional_indicator' => 1});
 my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
 
 my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
@@ -124,10 +127,21 @@ for my $line (@tests) {
   $test_string =~ s/\\u000D/\\r/g;
   $test_string =~ s/\\u0022/\\\"/g;
   $sequence =~ s/^\s*รท\s*//; # Trim leading break character
+  
+  # TODO: When upgrading JFlex to a version that supports Unicode 11.0+: remove the special case below for a Unicode 9.0 test data line that conflicts with TR#51 11.0 test data
+  # รท 200D รท 261D รท  #  รท [0.2] ZERO WIDTH JOINER (ZWJ_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+  if ($sequence =~ /^200D\s*รท\s*261D$/) {
+    print OUT "    // Skipping this test because it conflicts with TR#51 v11.0 rules.\n\n";
+    next;
+  }
+  
   my @tokens = ();
+  my $isfirst = 0;
   for my $candidate (split /\s*รท\s*/, $sequence) {
+    $isfirst = 1;
     my @chars = ();
-    my $has_wanted_char = 0;
+    my $has_wanted_chars = 0;
+    my $prev_char_regional_indicator = 0;
     while ($candidate =~ /([0-9A-F]+)/gi) {
       my $hexchar = $1;
       if (4 == length($hexchar)) {
@@ -135,12 +149,21 @@ for my $line (@tests) {
       } else {
         push @chars, above_BMP_char_to_surrogates($hexchar);
       }
-      unless ($has_wanted_char) {
-        $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
+      unless ($has_wanted_chars) {
+        my $codepoint = hex($hexchar);
+        if (defined($codepoints->[$codepoint])) {
+          $has_wanted_chars = 1;
+        } elsif (defined($regional_indicator_codepoints->[$codepoint])) {
+          if (1 == $prev_char_regional_indicator) {
+            $has_wanted_chars = 1; # must be 2 regional indicators in a row
+          } else {
+            $prev_char_regional_indicator = 1;
+          }
+        }
       }
     }
-    if ($has_wanted_char) {
-      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
+    if ($has_wanted_chars) {
+      push @tokens, '"'.join('', map { $_ eq "0022" ? "\\\"" : "\\u$_" } @chars).'"';
     }
   }
   print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";


[19/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
deleted file mode 100644
index 4a3731e..0000000
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
+++ /dev/null
@@ -1,5537 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.junit.Ignore;
-
-/**
- * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
- * from: http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
- *
- * WordBreakTest.txt indicates the points in the provided character sequences
- * at which conforming implementations must and must not break words.  This
- * class tests for expected token extraction from each of the test sequences
- * in WordBreakTest.txt, where the expected tokens are those character
- * sequences bounded by word breaks and containing at least one character
- * from one of the following character sets:
- *
- *    \p{Script = Han}                (From http://www.unicode.org/Public/6.3.0/ucd/Scripts.txt)
- *    \p{Script = Hiragana}
- *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.3.0/ucd/LineBreak.txt)
- *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt)
- *    \p{WordBreak = Hebrew_Letter}
- *    \p{WordBreak = Katakana}
- *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\uFF10-\uFF19]                (Full-width Arabic digits)
- */
-@Ignore
-public class WordBreakTestUnicode_6_3_0 extends BaseTokenStreamTestCase {
-
-  public void test(Analyzer analyzer) throws Exception {
-    // รท 0001 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0001",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
-                     new String[] {  });
-
-    // รท 0001 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\r",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
-                     new String[] {  });
-
-    // รท 0001 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\n",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
-                     new String[] {  });
-
-    // รท 0001 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u000B",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
-                     new String[] {  });
-
-    // รท 0001 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 0001 ร— 0308 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 0001 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 0001 ร— 0308 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 0001 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u003A",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
-                     new String[] {  });
-
-    // รท 0001 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002C",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
-                     new String[] {  });
-
-    // รท 0001 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002E",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
-                     new String[] {  });
-
-    // รท 0001 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 0001 ร— 0308 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 0001 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u005F",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
-                     new String[] {  });
-
-    // รท 0001 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 0001 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 0001 ร— 0308 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 0001 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\"",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
-                     new String[] {  });
-
-    // รท 0001 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0027",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
-                     new String[] {  });
-
-    // รท 0001 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u00AD",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 0001 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0300",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
-                     new String[] {  });
-
-    // รท 0001 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 0001 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 0001 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0001",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000D รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\r",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\r",
-                     new String[] {  });
-
-    // รท 000D ร— 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) ร— [3.0] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\n",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\n",
-                     new String[] {  });
-
-    // รท 000D รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u000B",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000D รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000D รท 0308 รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000D รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000D รท 0308 รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000D รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u003A",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000D รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002C",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000D รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002E",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000D รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000D รท 0308 รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000D รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u005F",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000D รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000D รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000D รท 0308 รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000D รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\"",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\"",
-                     new String[] {  });
-
-    // รท 000D รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0027",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000D รท 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u00AD",
-                     new String[] {  });
-
-    // รท 000D รท 0308 ร— 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000D รท 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0300",
-                     new String[] {  });
-
-    // รท 000D รท 0308 ร— 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000D รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000D รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000D รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0001",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000A รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\r",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\r",
-                     new String[] {  });
-
-    // รท 000A รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\n",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\n",
-                     new String[] {  });
-
-    // รท 000A รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u000B",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000A รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000A รท 0308 รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000A รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000A รท 0308 รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000A รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u003A",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000A รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002C",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000A รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002E",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000A รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000A รท 0308 รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000A รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u005F",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000A รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000A รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000A รท 0308 รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000A รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\"",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\"",
-                     new String[] {  });
-
-    // รท 000A รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0027",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000A รท 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u00AD",
-                     new String[] {  });
-
-    // รท 000A รท 0308 ร— 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000A รท 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0300",
-                     new String[] {  });
-
-    // รท 000A รท 0308 ร— 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000A รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000A รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000A รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0001",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000B รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\r",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
-                     new String[] {  });
-
-    // รท 000B รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\n",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
-                     new String[] {  });
-
-    // รท 000B รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u000B",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000B รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000B รท 0308 รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000B รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000B รท 0308 รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000B รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u003A",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000B รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002C",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000B รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002E",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000B รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000B รท 0308 รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000B รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u005F",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000B รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000B รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000B รท 0308 รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000B รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\"",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
-                     new String[] {  });
-
-    // รท 000B รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0027",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000B รท 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u00AD",
-                     new String[] {  });
-
-    // รท 000B รท 0308 ร— 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000B รท 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0300",
-                     new String[] {  });
-
-    // รท 000B รท 0308 ร— 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000B รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000B รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000B รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 3031 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0001",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\r",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\n",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u000B",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u3031",
-                     new String[] { "\u3031\u3031" });
-
-    // รท 3031 ร— 0308 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
-                     new String[] { "\u3031\u0308\u3031" });
-
-    // รท 3031 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0041",
-                     new String[] { "\u3031", "\u0041" });
-
-    // รท 3031 ร— 0308 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
-                     new String[] { "\u3031\u0308", "\u0041" });
-
-    // รท 3031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u003A",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002C",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002E",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0030",
-                     new String[] { "\u3031", "\u0030" });
-
-    // รท 3031 ร— 0308 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
-                     new String[] { "\u3031\u0308", "\u0030" });
-
-    // รท 3031 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u005F",
-                     new String[] { "\u3031\u005F" });
-
-    // รท 3031 ร— 0308 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
-                     new String[] { "\u3031\u0308\u005F" });
-
-    // รท 3031 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u05D0",
-                     new String[] { "\u3031", "\u05D0" });
-
-    // รท 3031 ร— 0308 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
-                     new String[] { "\u3031\u0308", "\u05D0" });
-
-    // รท 3031 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\"",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0027",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u00AD",
-                     new String[] { "\u3031\u00AD" });
-
-    // รท 3031 ร— 0308 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
-                     new String[] { "\u3031\u0308\u00AD" });
-
-    // รท 3031 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0300",
-                     new String[] { "\u3031\u0300" });
-
-    // รท 3031 ร— 0308 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
-                     new String[] { "\u3031\u0308\u0300" });
-
-    // รท 3031 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
-                     new String[] { "\u3031", "\u0061\u2060" });
-
-    // รท 3031 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
-                     new String[] { "\u3031\u0308", "\u0061\u2060" });
-
-    // รท 3031 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 0041 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0001",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\r",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\n",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u000B",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u3031",
-                     new String[] { "\u0041", "\u3031" });
-
-    // รท 0041 ร— 0308 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
-                     new String[] { "\u0041\u0308", "\u3031" });
-
-    // รท 0041 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0041",
-                     new String[] { "\u0041\u0041" });
-
-    // รท 0041 ร— 0308 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
-                     new String[] { "\u0041\u0308\u0041" });
-
-    // รท 0041 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u003A",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002C",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002E",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0030",
-                     new String[] { "\u0041\u0030" });
-
-    // รท 0041 ร— 0308 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
-                     new String[] { "\u0041\u0308\u0030" });
-
-    // รท 0041 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u005F",
-                     new String[] { "\u0041\u005F" });
-
-    // รท 0041 ร— 0308 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
-                     new String[] { "\u0041\u0308\u005F" });
-
-    // รท 0041 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u05D0",
-                     new String[] { "\u0041\u05D0" });
-
-    // รท 0041 ร— 0308 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
-                     new String[] { "\u0041\u0308\u05D0" });
-
-    // รท 0041 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\"",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0027",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 00AD รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u00AD",
-                     new String[] { "\u0041\u00AD" });
-
-    // รท 0041 ร— 0308 ร— 00AD รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u00AD",
-                     new String[] { "\u0041\u0308\u00AD" });
-
-    // รท 0041 ร— 0300 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0300",
-                     new String[] { "\u0041\u0300" });
-
-    // รท 0041 ร— 0308 ร— 0300 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0300",
-                     new String[] { "\u0041\u0308\u0300" });
-
-    // รท 0041 ร— 0061 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u2060",
-                     new String[] { "\u0041\u0061\u2060" });
-
-    // รท 0041 ร— 0308 ร— 0061 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u2060",
-                     new String[] { "\u0041\u0308\u0061\u2060" });
-
-    // รท 0041 ร— 0061 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u003A",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u003A",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 0027 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 0027 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u002C",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u002C",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0031 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u003A",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u003A",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u0027",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u0027",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002C",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002C",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 002E ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 002E ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 003A รท 0001 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0001",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0001 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0001",
-                     new String[] {  });
-
-    // รท 003A รท 000D รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\r",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000D รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\r",
-                     new String[] {  });
-
-    // รท 003A รท 000A รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\n",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000A รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\n",
-                     new String[] {  });
-
-    // รท 003A รท 000B รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u000B",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000B รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u000B",
-                     new String[] {  });
-
-    // รท 003A รท 3031 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 003A ร— 0308 รท 3031 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 003A รท 0041 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 003A ร— 0308 รท 0041 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 003A รท 003A รท  #  รท [0.2] COLON (MidLetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u003A",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 003A รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u003A",
-                     new String[] {  });
-
-    // รท 003A รท 002C รท  #  รท [0.2] COLON (MidLetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002C",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 002C รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002C",
-                     new String[] {  });
-
-    // รท 003A รท 002E รท  #  รท [0.2] COLON (MidLetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002E",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 002E รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002E",
-                     new String[] {  });
-
-    // รท 003A รท 0030 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 003A ร— 0308 รท 0030 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 003A รท 005F รท  #  รท [0.2] COLON (MidLetter) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u005F",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 005F รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u005F",
-                     new String[] {  });
-
-    // รท 003A รท 1F1E6 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 1F1E6 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 003A รท 05D0 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 003A ร— 0308 รท 05D0 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 003A รท 0022 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\"",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0022 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\"",
-                     new String[] {  });
-
-    // รท 003A รท 0027 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0027",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0027 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0027",
-                     new String[] {  });
-
-    // รท 003A ร— 00AD รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u00AD",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 ร— 00AD รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 003A ร— 0300 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0300",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 ร— 0300 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0300",
-                     new Str

<TRUNCATED>

[10/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
new file mode 100644
index 0000000..027a1b5
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
@@ -0,0 +1,8276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
+ * from: http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakTest.txt
+ *
+ * WordBreakTest.txt indicates the points in the provided character sequences
+ * at which conforming implementations must and must not break words.  This
+ * class tests for expected token extraction from each of the test sequences
+ * in WordBreakTest.txt, where the expected tokens are those character
+ * sequences bounded by word breaks and containing at least one character
+ * from one of the following character sets:
+ *
+ *    \p{Script = Han}                (From http://www.unicode.org/Public/9.0.0/ucd/Scripts.txt)
+ *    \p{Script = Hiragana}
+ *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/9.0.0/ucd/LineBreak.txt)
+ *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakProperty.txt)
+ *    \p{WordBreak = Hebrew_Letter}
+ *    \p{WordBreak = Katakana}
+ *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
+ *    [\uFF10-\uFF19]                 (Full-width Arabic digits)
+ */
+@Ignore
+public class WordBreakTestUnicode_9_0_0 extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    // รท 0001 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0001",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
+                     new String[] {  });
+
+    // รท 0001 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\r",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
+                     new String[] {  });
+
+    // รท 0001 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\n",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
+                     new String[] {  });
+
+    // รท 0001 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u000B",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
+                     new String[] {  });
+
+    // รท 0001 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 0001 ร— 0308 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 0001 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 0001 ร— 0308 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 0001 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u003A",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
+                     new String[] {  });
+
+    // รท 0001 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002C",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
+                     new String[] {  });
+
+    // รท 0001 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002E",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
+                     new String[] {  });
+
+    // รท 0001 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 0001 ร— 0308 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 0001 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u005F",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
+                     new String[] {  });
+
+    // รท 0001 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 0001 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 0001 ร— 0308 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 0001 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\"",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
+                     new String[] {  });
+
+    // รท 0001 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0027",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
+                     new String[] {  });
+
+    // รท 0001 รท 261D รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 0001 ร— 0308 รท 261D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 0001 รท 1F3FB รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 0001 ร— 0308 รท 1F3FB รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 0001 รท 2764 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 0001 ร— 0308 รท 2764 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 0001 รท 1F466 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 0001 ร— 0308 รท 1F466 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 0001 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u00AD",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 0001 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0300",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
+                     new String[] {  });
+
+    // รท 0001 ร— 200D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u200D",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 200D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u200D",
+                     new String[] {  });
+
+    // รท 0001 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 0001 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 0001 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0001",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000D รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\r",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\r",
+                     new String[] {  });
+
+    // รท 000D ร— 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) ร— [3.0] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\n",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\n",
+                     new String[] {  });
+
+    // รท 000D รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u000B",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000D รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000D รท 0308 รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000D รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000D รท 0308 รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000D รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u003A",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000D รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002C",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000D รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002E",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000D รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000D รท 0308 รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000D รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u005F",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000D รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000D รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000D รท 0308 รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000D รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\"",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\"",
+                     new String[] {  });
+
+    // รท 000D รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0027",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000D รท 261D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000D รท 0308 รท 261D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000D รท 1F3FB รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000D รท 0308 รท 1F3FB รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000D รท 2764 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000D รท 0308 รท 2764 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000D รท 1F466 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000D รท 0308 รท 1F466 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000D รท 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u00AD",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000D รท 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0300",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000D รท 200D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u200D",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 200D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000D รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000D รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000D รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0001",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000A รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\r",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\r",
+                     new String[] {  });
+
+    // รท 000A รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\n",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\n",
+                     new String[] {  });
+
+    // รท 000A รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u000B",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000A รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000A รท 0308 รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000A รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000A รท 0308 รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000A รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u003A",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000A รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002C",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000A รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002E",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000A รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000A รท 0308 รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000A รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u005F",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000A รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000A รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000A รท 0308 รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000A รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\"",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\"",
+                     new String[] {  });
+
+    // รท 000A รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0027",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000A รท 261D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000A รท 0308 รท 261D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000A รท 1F3FB รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000A รท 0308 รท 1F3FB รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000A รท 2764 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000A รท 0308 รท 2764 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000A รท 1F466 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000A รท 0308 รท 1F466 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000A รท 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u00AD",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000A รท 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0300",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000A รท 200D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u200D",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 200D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000A รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000A รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000A รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0001",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000B รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\r",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
+                     new String[] {  });
+
+    // รท 000B รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\n",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
+                     new String[] {  });
+
+    // รท 000B รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u000B",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000B รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000B รท 0308 รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000B รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000B รท 0308 รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000B รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u003A",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000B รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002C",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000B รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002E",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000B รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000B รท 0308 รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000B รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u005F",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000B รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000B รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000B รท 0308 รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000B รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\"",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
+                     new String[] {  });
+
+    // รท 000B รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0027",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000B รท 261D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000B รท 0308 รท 261D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000B รท 1F3FB รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000B รท 0308 รท 1F3FB รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000B รท 2764 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000B รท 0308 รท 2764 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000B รท 1F466 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000B รท 0308 รท 1F466 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000B รท 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u00AD",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000B รท 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0300",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000B รท 200D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u200D",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 200D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000B รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000B รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000B รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 3031 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0001",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\r",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\n",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u000B",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u3031",
+                     new String[] { "\u3031\u3031" });
+
+    // รท 3031 ร— 0308 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
+                     new String[] { "\u3031\u0308\u3031" });
+
+    // รท 3031 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0041",
+                     new String[] { "\u3031", "\u0041" });
+
+    // รท 3031 ร— 0308 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
+                     new String[] { "\u3031\u0308", "\u0041" });
+
+    // รท 3031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u003A",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002C",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002E",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0030",
+                     new String[] { "\u3031", "\u0030" });
+
+    // รท 3031 ร— 0308 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
+                     new String[] { "\u3031\u0308", "\u0030" });
+
+    // รท 3031 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u005F",
+                     new String[] { "\u3031\u005F" });
+
+    // รท 3031 ร— 0308 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
+                     new String[] { "\u3031\u0308\u005F" });
+
+    // รท 3031 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u05D0",
+                     new String[] { "\u3031", "\u05D0" });
+
+    // รท 3031 ร— 0308 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
+                     new String[] { "\u3031\u0308", "\u05D0" });
+
+    // รท 3031 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\"",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0027",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 261D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u261D",
+                     new String[] { "\u3031", "\u261D" });
+
+    // รท 3031 ร— 0308 รท 261D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u261D",
+                     new String[] { "\u3031\u0308", "\u261D" });
+
+    // รท 3031 รท 1F3FB รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDFFB",
+                     new String[] { "\u3031", "\uD83C\uDFFB" });
+
+    // รท 3031 ร— 0308 รท 1F3FB รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDFFB",
+                     new String[] { "\u3031\u0308", "\uD83C\uDFFB" });
+
+    // รท 3031 รท 2764 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u2764",
+                     new String[] { "\u3031", "\u2764" });
+
+    // รท 3031 ร— 0308 รท 2764 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u2764",
+                     new String[] { "\u3031\u0308", "\u2764" });
+
+    // รท 3031 รท 1F466 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83D\uDC66",
+                     new String[] { "\u3031", "\uD83D\uDC66" });
+
+    // รท 3031 ร— 0308 รท 1F466 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83D\uDC66",
+                     new String[] { "\u3031\u0308", "\uD83D\uDC66" });
+
+    // รท 3031 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u00AD",
+                     new String[] { "\u3031\u00AD" });
+
+    // รท 3031 ร— 0308 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
+                     new String[] { "\u3031\u0308\u00AD" });
+
+    // รท 3031 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0300",
+                     new String[] { "\u3031\u0300" });
+
+    // รท 3031 ร— 0308 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
+                     new String[] { "\u3031\u0308\u0300" });
+
+    // รท 3031 ร— 200D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u200D",
+                     new String[] { "\u3031\u200D" });
+
+    // รท 3031 ร— 0308 ร— 200D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u200D",
+                     new String[] { "\u3031\u0308\u200D" });
+
+    // รท 3031 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
+                     new String[] { "\u3031", "\u0061\u2060" });
+
+    // รท 3031 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
+                     new String[] { "\u3031\u0308", "\u0061\u2060" });
+
+    // รท 3031 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 0041 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0001",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\r",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\n",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u000B",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u3031",
+                     new String[] { "\u0041", "\u3031" });
+
+    // รท 0041 ร— 0308 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
+                     new String[] { "\u0041\u0308", "\u3031" });
+
+    // รท 0041 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0041",
+                     new String[] { "\u0041\u0041" });
+
+    // รท 0041 ร— 0308 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
+                     new String[] { "\u0041\u0308\u0041" });
+
+    // รท 0041 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u003A",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002C",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002E",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0030",
+                     new String[] { "\u0041\u0030" });
+
+    // รท 0041 ร— 0308 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
+                     new String[] { "\u0041\u0308\u0030" });
+
+    // รท 0041 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u005F",
+                     new String[] { "\u0041\u005F" });
+
+    // รท 0041 ร— 0308 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
+                     new String[] { "\u0041\u0308\u005F" });
+
+    // รท 0041 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u05D0",
+                     new String[] { "\u0041\u05D0" });
+
+    // รท 0041 ร— 0308 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
+                     new String[] { "\u0041\u0308\u05D0" });
+
+    // รท 0041 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\"",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0027",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 261D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u261D",
+                     new String[] { "\u0041", "\u261D" });
+
+    // รท 0041 ร— 0308 รท 261D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u261D",
+                     new String[] { "\u0041\u0308", "\u261D" });
+
+    // รท 0041 รท 1F3FB รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDFFB",
+                     new String[] { "\u0041", "\uD83C\uDFFB" });
+
+    // รท 0041 ร— 0308 รท 1F3FB รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDFFB",
+                     new String[] { "\u0041\u0308", "\uD83C\uDFFB" });
+
+    // รท 0041 รท 2764 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u2764",
+                     new String[] { "\u0041", "\u2764" });
+
+    // รท 0041 ร— 0308 รท 2764 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u2764",
+                     new String[] { "\u0041\u0308", "\u2764" });
+
+    // รท 0041 รท 1F466 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83D\uDC66",
+                     new String[] { "\u0041", "\uD83D\uDC66" });
+
+    // รท 0041 ร— 0308 รท 1F466 รท  #  รท [0.2] LATIN CAPITA

<TRUNCATED>

[22/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
index 292f2ad..e4b10af 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
@@ -37,12 +37,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %integer
 %final
 %public
@@ -52,22 +53,73 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %xstate AVOID_BAD_URL
 %buffer 255
 
-// UAX#29 WB4. X (Extend | Format)* --> X
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+
+//////////////////////////////////////////////////////////////////////////
+// Begin Emoji Macros - see documentation below, near the EMOJI_TYPE rule
+
+// TODO: Remove this include file when JFlex supports these properties directly (in Unicode 11.0+)
+%include ../../../../../../../../../core/src/data/jflex/UnicodeEmojiProperties.jflex
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+//   \uFE0E (Text Presentation Selector) and \uFE0F (Emoji Presentation Selector) - included in \p{WB:Extend}
+//   - are explicitly excluded here so that we can properly handle Emoji sequences.
+//
+ExtFmtZwjSansPresSel = [[\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]--[\uFE0E\uFE0F]]*
+
+KeyCapBaseChar = [0-9#*]
+KeyCapBaseCharEx = {KeyCapBaseChar} {ExtFmtZwjSansPresSel}
+KeyCap = \u20E3
+KeyCapEx = {KeyCap} {ExtFmtZwjSansPresSel}
+
+// # \u3030 = WAVY DASH; \u303D = PART ALTERNATION MARK
+AccidentalEmoji = [ยฉยฎโ„ข\u3030\u303D]
+EmojiRKAM = ( \p{WB:Regional_Indicator} | {KeyCapBaseChar} | {AccidentalEmoji} | {Emoji_Modifier} )
+
+// Unlike Unicode properties, macros are not allowed in character classes, so we achieve set difference
+// by applying DeMorgan: the expression that matches everything of 'a' not matched by 'b' is: !(!a|b)
+// TODO: Convert this expression to character class difference when JFlex supports the properties directly (in Unicode 11.0+)
+EmojiSansRKAM = !( ! {Emoji} | {EmojiRKAM} )
+
+EmojiChar = ( {Extended_Pictographic} | {EmojiSansRKAM} )
+
+EmojiCharEx         = {EmojiChar}           {ExtFmtZwjSansPresSel}
+EmojiModifierBaseEx = {Emoji_Modifier_Base} {ExtFmtZwjSansPresSel}
+EmojiModifierEx     = {Emoji_Modifier}      {ExtFmtZwjSansPresSel}
+
+EmojiPresentationSelector = \uFE0F
+EmojiCharOrPresSeqOrModSeq = ( \p{WB:ZWJ}* {EmojiCharEx} {EmojiPresentationSelector}? ) | ( ( \p{WB:ZWJ}* {EmojiModifierBaseEx} )? {EmojiModifierEx} )
+TagSpec = [\u{E0020}-\u{E007E}]
+TagTerm = \u{E007F}
+
+// End Emoji Macros
+//////////////////////////////////////////////////////////////////////////
+
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
 //
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
-NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
-HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] {ExtFmtZwj}
+AHLetterEx          = [\p{WB:ALetter}\p{WB:Hebrew_Letter}]                      {ExtFmtZwj}
+NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        {ExtFmtZwj}
+KatakanaEx          = \p{WB:Katakana}                                           {ExtFmtZwj} 
+MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      {ExtFmtZwj} 
+MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         {ExtFmtZwj}
+ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       {ExtFmtZwj}
+HanEx               = \p{Script:Han}                                            {ExtFmtZwj}
+HiraganaEx          = \p{Script:Hiragana}                                       {ExtFmtZwj}
+SingleQuoteEx       = \p{WB:Single_Quote}                                       {ExtFmtZwj}
+DoubleQuoteEx       = \p{WB:Double_Quote}                                       {ExtFmtZwj}
+HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      {ExtFmtZwj}
+RegionalIndicatorEx = \p{WB:Regional_Indicator}                                 {ExtFmtZwj}
+ComplexContextEx    = \p{LB:Complex_Context}                                    {ExtFmtZwj}
+
 
 // URL and E-mail syntax specifications:
 //
@@ -174,18 +226,28 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
    */
   public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
   
+  /** Ideographic token type */
   public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
   
+  /** Hiragana token type */
   public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
   
+  /** Katakana token type */
   public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
   
+  /** Hangul token type */
   public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
   
+  /** Email token type */
   public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
   
+  /** URL token type */
   public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
 
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = UAX29URLEmailTokenizer.EMOJI;
+
+  /** Character count processed so far */
   public final int yychar()
   {
     return yychar;
@@ -213,11 +275,11 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
 
 <YYINITIAL, AVOID_BAD_URL> {
 
-// UAX#29 WB1.   sot   รท
-//        WB2.     รท   eot
+// UAX#29 WB1.    sot รท Any
+//        WB2.    Any รท eot
 //
   <<EOF>> { return YYEOF; }
-
+  
   {URL}   { yybegin(YYINITIAL); return URL_TYPE; }
 
   // LUCENE-5391: Don't recognize no-scheme domain-only URLs with a following alphanumeric character
@@ -244,14 +306,61 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
 
   {EMAIL} { yybegin(YYINITIAL); return EMAIL_TYPE; }
 
-  // UAX#29 WB8.   Numeric ร— Numeric
-  //        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) ร— Numeric
-  //        WB12.  Numeric ร— (MidNum | MidNumLet | Single_Quote) Numeric
-  //        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-  //        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana)
+
+  // Instead of these: UAX#29 WB3c. ZWJ ร— (Glue_After_Zwj | EBG)
+  //                          WB14. (E_Base | EBG) ร— E_Modifier
+  //                          WB15. ^ (RI RI)* RI ร— RI
+  //                          WB16. [^RI] (RI RI)* RI ร— RI
+  //
+  // We use the "emoji_sequence" rule from http://www.unicode.org/reports/tr51/tr51-14.html (Unicode 11.0)
+  // and the Emoji data from http://unicode.org/Public/emoji/11.0/emoji-data.txt (in included file UnicodeEmojiProperties.jflex)
+  // 
+  // emoji_sequence :=
+  //    Top-level EBNF           Expanded #1                       Expanded #2                       Expanded #3
+  //    ---------------------    ----------------------------      -----------------------------     ----------------------------------------------
+  //      emoji_core_sequence      emoji_combining_sequence          emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_keycap_sequence           | [0-9#*] \u{FE0F 20E3}      [1]
+  //                             | emoji_modifier_sequence                                           | \p{Emoji_Modifier_Base} \p{Emoji_Modifier}
+  //                             | emoji_flag_sequence                                               | \p{WB:Regional_Indicator}{2}               )
+  //
+  //    | emoji_zwj_sequence       emoji_zwj_element                 emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+  //                             ( ZWJ emoji_zwj_element )+                                          ( \p{WB:ZWJ} ^^ )+
+  // 
+  //    | emoji_tag_sequence     tag_base                            emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+  //                             tag_spec                                                            [\u{E0020}-\u{E007E}]+
+  //                             tag_term                                                            \u{E007F}
+  //
+  // [1] https://unicode.org/Public/emoji/11.0/emoji-test.txt includes key cap sequences 
+  //     WITHOUT \uFE0F (emoji presentation indicator), annotating them as "non-fully-qualified";
+  //     TR#51 says about non-fully-qualified *ZWJ sequences* that implementations may
+  //     choose whether to support them for segmentation.  This implementation will
+  //     recognize /[0-9#*]\u20E3/ - i.e. without \uFE0F - as Emoji. 
+  //
+  // See also: http://www.unicode.org/L2/L2016/16315-handling-seg-emoji.pdf
+  //           https://docs.google.com/document/d/1yDZ5TUZNVVKaM9zYCCLbRIAKGNZANsAGl0bcNzGGvn8
+  //
+  //     In particular, the above docs recommend a modified UAX#29 WB3c rule (covered by TR#51's "emoji_zwj_sequence"):
+  //
+  //         WB3cโ€ฒ ZWJ ร— โ€‹(Extended_Pictographic | EmojiNRK)
+  //
+    {EmojiCharOrPresSeqOrModSeq} ( ( \p{WB:ZWJ} {EmojiCharOrPresSeqOrModSeq} )* | {TagSpec}+ {TagTerm} ) 
+  | {KeyCapBaseCharEx} {EmojiPresentationSelector}? {KeyCapEx} 
+  | {RegionalIndicatorEx}{2} 
+    { yybegin(YYINITIAL); return EMOJI_TYPE; }
+
+  // UAX#29 WB8.    Numeric ร— Numeric
+  //        WB11.   Numeric (MidNum | MidNumLetQ) ร— Numeric
+  //        WB12.   Numeric ร— (MidNum | MidNumLetQ) Numeric
+  //        WB13a.  (AHLetter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+  //        WB13b.  ExtendNumLet ร— (AHLetter | Numeric | Katakana)
   //
   {ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
-    {  yybegin(YYINITIAL); return NUMERIC_TYPE; }
+    { yybegin(YYINITIAL); return NUMERIC_TYPE; }
 
   // subset of the below for typing purposes only!
   {HangulEx}+
@@ -260,32 +369,32 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   {KatakanaEx}+
     { yybegin(YYINITIAL); return KATAKANA_TYPE; }
 
-  // UAX#29 WB5.   (ALetter | Hebrew_Letter) ร— (ALetter | Hebrew_Letter)
-  //        WB6.   (ALetter | Hebrew_Letter) ร— (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-  //        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) ร— (ALetter | Hebrew_Letter)
-  //        WB7a.  Hebrew_Letter ร— Single_Quote
-  //        WB7b.  Hebrew_Letter ร— Double_Quote Hebrew_Letter
-  //        WB7c.  Hebrew_Letter Double_Quote ร— Hebrew_Letter
-  //        WB9.   (ALetter | Hebrew_Letter) ร— Numeric
-  //        WB10.  Numeric ร— (ALetter | Hebrew_Letter)
-  //        WB13.  Katakana ร— Katakana
-  //        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-  //        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana)
+  // UAX#29 WB5.    AHLetter ร— AHLetter
+  //        WB6.    AHLetter ร— (MidLetter | MidNumLetQ) AHLetter
+  //        WB7.    AHLetter (MidLetter | MidNumLetQ) ร— AHLetter
+  //        WB7a.   Hebrew_Letter ร— Single_Quote
+  //        WB7b.   Hebrew_Letter ร— Double_Quote Hebrew_Letter
+  //        WB7c.   Hebrew_Letter Double_Quote ร— Hebrew_Letter
+  //        WB9.    AHLetter ร— Numeric
+  //        WB10.   Numeric ร— AHLetter
+  //        WB13.   Katakana ร— Katakana
+  //        WB13a.  (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+  //        WB13b.  ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
   //
-  {ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                       | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+  {ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                       | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                        )+
                      )
-  ({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                       | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+  ({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                       | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                        )+
                      )
   )*
-  {ExtendNumLetEx}*
+  {ExtendNumLetEx}* 
     { yybegin(YYINITIAL); return WORD_TYPE; }
 
 
@@ -297,7 +406,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   //    annex.  That means that satisfactory treatment of languages like Chinese
   //    or Thai requires special handling.
   //
-  // In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
+  // In Unicode 9.0, only one character has the \p{Line_Break = Contingent_Break}
   // property: U+FFFC ( ๏ฟผ ) OBJECT REPLACEMENT CHARACTER.
   //
   // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -310,18 +419,15 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   //
   {ComplexContextEx}+ { yybegin(YYINITIAL); return SOUTH_EAST_ASIAN_TYPE; }
 
-  // UAX#29 WB14.  Any รท Any
+  // UAX#29 WB999.  Any รท Any
   //
   {HanEx} { yybegin(YYINITIAL); return IDEOGRAPHIC_TYPE; }
   {HiraganaEx} { yybegin(YYINITIAL); return HIRAGANA_TYPE; }
 
-
-  // UAX#29 WB3.   CR ร— LF
-  //        WB3a.  (Newline | CR | LF) รท
-  //        WB3b.  รท (Newline | CR | LF)
-  //        WB13c. Regional_Indicator ร— Regional_Indicator
-  //        WB14.  Any รท Any
+  // UAX#29 WB3.    CR ร— LF
+  //        WB3a.   (Newline | CR | LF) รท
+  //        WB3b.   รท (Newline | CR | LF)
+  //        WB999.  Any รท Any
   //
-  {RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-    { yybegin(YYINITIAL); /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+  [^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */ }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
index 7f9227f..9295e1c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -341,7 +341,7 @@ class WikipediaTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -419,11 +419,11 @@ class WikipediaTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -575,28 +575,29 @@ final void reset() {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -820,199 +821,245 @@ final void reset() {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 47: break;
-        case 2: 
-          { positionInc = 1; return ALPHANUM;
-          }
-        case 48: break;
-        case 3: 
-          { positionInc = 1; return CJ;
-          }
-        case 49: break;
-        case 4: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 50: break;
-        case 5: 
-          { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 51: break;
-        case 6: 
-          { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 52: break;
-        case 7: 
-          { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 53: break;
-        case 8: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
-          }
-        case 54: break;
-        case 9: 
-          { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
-          }
-        case 55: break;
-        case 10: 
-          { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 56: break;
-        case 11: 
-          { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 57: break;
-        case 12: 
-          { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
-          }
-        case 58: break;
-        case 13: 
-          { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 59: break;
-        case 14: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
-          }
-        case 60: break;
-        case 15: 
-          { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 61: break;
-        case 16: 
-          { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 62: break;
-        case 17: 
-          { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
-          }
-        case 63: break;
-        case 18: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
-          }
-        case 64: break;
-        case 19: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
-          }
-        case 65: break;
-        case 20: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 66: break;
-        case 21: 
-          { yybegin(STRING); return currentTokType;/*pipe*/
-          }
-        case 67: break;
-        case 22: 
-          { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 68: break;
-        case 23: 
-          { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 69: break;
-        case 24: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 70: break;
-        case 25: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 71: break;
-        case 26: 
-          { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 72: break;
-        case 27: 
-          { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 73: break;
-        case 28: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 74: break;
-        case 29: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 75: break;
-        case 30: 
-          { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 76: break;
-        case 31: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
-          }
-        case 77: break;
-        case 32: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 78: break;
-        case 33: 
-          { positionInc = 1; return APOSTROPHE;
-          }
-        case 79: break;
-        case 34: 
-          { positionInc = 1; return HOST;
-          }
-        case 80: break;
-        case 35: 
-          { positionInc = 1; return NUM;
-          }
-        case 81: break;
-        case 36: 
-          { positionInc = 1; return COMPANY;
-          }
-        case 82: break;
-        case 37: 
-          { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 83: break;
-        case 38: 
-          { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
-          }
-        case 84: break;
-        case 39: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
-          }
-        case 85: break;
-        case 40: 
-          { positionInc = 1; return ACRONYM;
-          }
-        case 86: break;
-        case 41: 
-          { positionInc = 1; return EMAIL;
-          }
-        case 87: break;
-        case 42: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
-          }
-        case 88: break;
-        case 43: 
-          { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
-          }
-        case 89: break;
-        case 44: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 90: break;
-        case 45: 
-          { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 91: break;
-        case 46: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 92: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            return YYEOF;
-          } 
-          else {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+        return YYEOF;
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 47: break;
+          case 2: 
+            { positionInc = 1; return ALPHANUM;
+            } 
+            // fall through
+          case 48: break;
+          case 3: 
+            { positionInc = 1; return CJ;
+            } 
+            // fall through
+          case 49: break;
+          case 4: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 50: break;
+          case 5: 
+            { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 51: break;
+          case 6: 
+            { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 52: break;
+          case 7: 
+            { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 53: break;
+          case 8: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+            } 
+            // fall through
+          case 54: break;
+          case 9: 
+            { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+            } 
+            // fall through
+          case 55: break;
+          case 10: 
+            { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 56: break;
+          case 11: 
+            { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 57: break;
+          case 12: 
+            { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
+            } 
+            // fall through
+          case 58: break;
+          case 13: 
+            { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 59: break;
+          case 14: 
+            { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 60: break;
+          case 15: 
+            { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 61: break;
+          case 16: 
+            { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 62: break;
+          case 17: 
+            { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
+            } 
+            // fall through
+          case 63: break;
+          case 18: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
+            } 
+            // fall through
+          case 64: break;
+          case 19: 
+            { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
+            } 
+            // fall through
+          case 65: break;
+          case 20: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 66: break;
+          case 21: 
+            { yybegin(STRING); return currentTokType;/*pipe*/
+            } 
+            // fall through
+          case 67: break;
+          case 22: 
+            { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 68: break;
+          case 23: 
+            { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 69: break;
+          case 24: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 70: break;
+          case 25: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 71: break;
+          case 26: 
+            { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 72: break;
+          case 27: 
+            { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 73: break;
+          case 28: 
+            { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 74: break;
+          case 29: 
+            { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 75: break;
+          case 30: 
+            { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 76: break;
+          case 31: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
+            } 
+            // fall through
+          case 77: break;
+          case 32: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 78: break;
+          case 33: 
+            { positionInc = 1; return APOSTROPHE;
+            } 
+            // fall through
+          case 79: break;
+          case 34: 
+            { positionInc = 1; return HOST;
+            } 
+            // fall through
+          case 80: break;
+          case 35: 
+            { positionInc = 1; return NUM;
+            } 
+            // fall through
+          case 81: break;
+          case 36: 
+            { positionInc = 1; return COMPANY;
+            } 
+            // fall through
+          case 82: break;
+          case 37: 
+            { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 83: break;
+          case 38: 
+            { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
+            } 
+            // fall through
+          case 84: break;
+          case 39: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
+            } 
+            // fall through
+          case 85: break;
+          case 40: 
+            { positionInc = 1; return ACRONYM;
+            } 
+            // fall through
+          case 86: break;
+          case 41: 
+            { positionInc = 1; return EMAIL;
+            } 
+            // fall through
+          case 87: break;
+          case 42: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
+            } 
+            // fall through
+          case 88: break;
+          case 43: 
+            { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+            } 
+            // fall through
+          case 89: break;
+          case 44: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 90: break;
+          case 45: 
+            { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 91: break;
+          case 46: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 92: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
index cf6c65a..758d5d2 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
@@ -499,7 +499,7 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
 
     String randomHtmlishString2 // Don't create a comment (disallow "<!--") and don't include a closing ">"
         = TestUtil.randomHtmlishString(random(), maxNumElems).replaceAll(">", " ").replaceFirst("^--","__");
-    String unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 +"-[CDATA[";
+    String unclosedAngleBangNonCDATA = "<!" + randomHtmlishString2 +"-[CDATA[";
 
     String[] testGold = {
         "one<![CDATA[<one><two>three<four></four></two></one>]]>two",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
index b3b0ce1..507eb09 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
@@ -361,14 +361,14 @@ public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase {
 
     StringBuilder bToken = new StringBuilder();
     // exact max length:
-    for(int i=0;i<StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;i++) {
+    for(int i=0;i<UAX29URLEmailAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;i++) {
       bToken.append('b');
     }
 
     String bString = bToken.toString();
     // first bString is exact max default length; next one is 1 too long
     String input = "x " + bString + " " + bString + "b";
-    assertAnalyzesTo(a, input.toString(), new String[] {"x", bString, bString, "b"});
+    assertAnalyzesTo(a, input, new String[] {"x", bString, bString, "b"});
     a.close();
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
index d9d8381..76c5d55 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
@@ -467,7 +467,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
   }
 
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+    WordBreakTestUnicode_9_0_0 wordBreakTest = new WordBreakTestUnicode_9_0_0();
     wordBreakTest.test(a);
   }
   
@@ -545,6 +545,80 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
   }
 
 
+  /** simple emoji */
+  public void testEmoji() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉ ๐Ÿ’ฉ๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "๐Ÿ’ฉ", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence */
+  public void testEmojiSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ",
+        new String[] { "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence with fitzpatrick modifier */
+  public void testEmojiSequenceWithModifier() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ",
+        new String[] { "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** regional indicator */
+  public void testEmojiRegionalIndicator() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡บ๐Ÿ‡ธ",
+        new String[] { "๐Ÿ‡บ๐Ÿ‡ธ", "๐Ÿ‡บ๐Ÿ‡ธ" },
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** variation sequence */
+  public void testEmojiVariationSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#๏ธโƒฃ",
+        new String[] { "#๏ธโƒฃ" },
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3๏ธโƒฃ",
+        new String[] { "3๏ธโƒฃ",},
+        new String[] { "<EMOJI>" });
+
+    // text presentation sequences
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
+        new String[] { },
+        new String[] { });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
+        new String[] { "3\uFE0E",},
+        new String[] { "<NUM>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
+        new String[] { "\u2B55",},
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
+        new String[] { "\u2B55", "\u200D\u2B55"},
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  public void testEmojiTagSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ",
+        new String[] { "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  public void testEmojiTokenization() throws Exception {
+    // simple emoji around latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "poo๐Ÿ’ฉpoo",
+        new String[] { "poo", "๐Ÿ’ฉ", "poo" },
+        new String[] { "<ALPHANUM>", "<EMOJI>", "<ALPHANUM>" });
+    // simple emoji around non-latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉไธญๅœ‹๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "ไธญ", "ๅœ‹", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<EMOJI>" });
+  }
+
+  public void testUnicodeEmojiTests() throws Exception {
+    EmojiTokenizationTestUnicode_11_0 emojiTest = new EmojiTokenizationTestUnicode_11_0();
+    emojiTest.test(a);
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index 9efe15f..0bbd19a 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -2388,7 +2388,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
 
   <!-- JFlex task -->
   <target name="-install-jflex" unless="jflex.loaded" depends="ivy-availability-check,ivy-configure">
-    <ivy:cachepath organisation="de.jflex" module="jflex" revision="1.6.0"
+    <ivy:cachepath organisation="de.jflex" module="jflex" revision="1.7.0"
                    inline="true" conf="default" transitive="true" pathid="jflex.classpath"/>
     <taskdef name="jflex" classname="jflex.anttask.JFlexTask" classpathref="jflex.classpath"/>
     <property name="jflex.loaded" value="true"/>
@@ -2645,7 +2645,11 @@ The following arguments can be provided to ant to alter its behaviour and target
     <attribute name="dir"/>
     <attribute name="name"/>
     <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+      <!-- The default skeleton is specified here to work around a JFlex ant task bug:    -->
+      <!-- invocations with a non-default skeleton will cause following invocations to    -->
+      <!-- use the same skeleton, though not specified, unless the default is configured. -->
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on"
+             skeleton="${common.dir}/core/src/data/jflex/skeleton.default"/>
     </sequential>
   </macrodef>
 
@@ -2653,20 +2657,13 @@ The following arguments can be provided to ant to alter its behaviour and target
     <attribute name="dir"/>
     <attribute name="name"/>
     <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
       <!-- LUCENE-5897: Disallow scanner buffer expansion -->
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
-                     replace="" flags="s" />
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on"
+             skeleton="${common.dir}/core/src/data/jflex/skeleton.disable.buffer.expansion.txt"/>
+      <!-- Since the ZZ_BUFFERSIZE declaration is generated rather than in the skeleton, we have to transform it here. -->
       <replaceregexp file="@{dir}/@{name}.java"
                      match="private static final int ZZ_BUFFERSIZE ="
                      replace="private int ZZ_BUFFERSIZE ="/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="int requested = zzBuffer.length - zzEndRead;"
-                     replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="(zzFinalHighSurrogate = 1;)(\r?\n)"
-                     replace="\1\2          if (totalRead == 1) { return true; }\2"/>
     </sequential>
   </macrodef>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex b/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
new file mode 100644
index 0000000..c631dee
--- /dev/null
+++ b/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file was automatically generated by getUnicodeEmojiProperties.pl
+// from: http://unicode.org/Public/emoji/11.0/emoji-data.txt 
+
+Emoji = [\u{23}\u{2A}\u{30}-\u{39}\u{A9}\u{AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2604}\u{260E}\u{2611}\u{2614}-\u{2615}\u{2618}\u{261D}\u{2620}\u{2622}-\u{2623}\u{2626}\u{262A}\u{262E}-\u{262F}\u{2638}-\u{263A}\u{2640}\u{2642}\u{2648}-\u{2653}\u{265F}-\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267E}-\u{267F}\u{2692}-\u{2697}\u{2699}\u{269B}-\u{269C}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26B0}-\u{26B1}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26C8}\u{26CE}-\u{26CF}\u{26D1}\u{26D3}-\u{26D4}\u{26E9}-\u{26EA}\u{26F0}-\u{26F5}\u{26F7}-\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270D}\u{270F}\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{
 2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E6}-\u{1F1FF}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}-\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F7}-\u{1F4FD}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}-\u{1F570}\u{1F573}-\u{1F57A}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}-\u{1F596}\u{1F5A4}-\u{1F5A5}\u{1F5A8}\u{1F5B1}-\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5E8}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D2}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6EC}\u{1F6F0}\u{1F6F3}-\u{1F6F9}\u{1F910}-\u{1F93A}\u{1F93C}-\u{1F93E}\u{1F940}-\u{1F945}\u{1F947}-\u{1F970}\u{1F973}-\u{1F976}\u{1F97A}\u{1F97C}-\u{1F9A2}\u{1F9B0}-\u{1F9B9}\u{1F9C0}-\u{1F9C2}\u{1F9D0}-\u{1F9FF}]
+Emoji_Modifier = [\u{1F3FB}-\u{1F3FF}]
+Emoji_Modifier_Base = [\u{261D}\u{26F9}\u{270A}-\u{270D}\u{1F385}\u{1F3C2}-\u{1F3C4}\u{1F3C7}\u{1F3CA}-\u{1F3CC}\u{1F442}-\u{1F443}\u{1F446}-\u{1F450}\u{1F466}-\u{1F469}\u{1F46E}\u{1F470}-\u{1F478}\u{1F47C}\u{1F481}-\u{1F483}\u{1F485}-\u{1F487}\u{1F4AA}\u{1F574}-\u{1F575}\u{1F57A}\u{1F590}\u{1F595}-\u{1F596}\u{1F645}-\u{1F647}\u{1F64B}-\u{1F64F}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F6C0}\u{1F6CC}\u{1F918}-\u{1F91C}\u{1F91E}-\u{1F91F}\u{1F926}\u{1F930}-\u{1F939}\u{1F93D}-\u{1F93E}\u{1F9B5}-\u{1F9B6}\u{1F9B8}-\u{1F9B9}\u{1F9D1}-\u{1F9DD}]
+Extended_Pictographic = [\u{A9}\u{AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{2388}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2605}\u{2607}-\u{2612}\u{2614}-\u{2685}\u{2690}-\u{2705}\u{2708}-\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2767}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F000}-\u{1F0FF}\u{1F10D}-\u{1F10F}\u{1F12F}\u{1F16C}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1AD}-\u{1F1E5}\u{1F201}-\u{1F20F}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F23C}-\u{1F23F}\u{1F249}-\u{1F3FA}\u{1F400}-\u{1F53D}\u{1F546}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{1F774}-\u{1F77F}\u{1F7D5}-\u{1F7FF}\u{1F80C}-\u{1F80F}\u{1F848}-\u{1F84F}\u{1F85A}-\u{1F85F}\u{1F888}-\u{1F88F}\u{1F8AE
 }-\u{1F8FF}\u{1F90C}-\u{1F93A}\u{1F93C}-\u{1F945}\u{1F947}-\u{1FFFD}]
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl b/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
new file mode 100644
index 0000000..e818b64
--- /dev/null
+++ b/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
@@ -0,0 +1,168 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+/) {
+    print STDERR "Usage: $script_name -v <version>\n";
+    print STDERR "\tversion must be of the form X.Y, e.g. 9.0\n"
+        if ($version);
+    exit 1;
+}
+my $emoji_data_url = "http://unicode.org/Public/emoji/$version/emoji-data.txt";
+my $output_filename = "UnicodeEmojiProperties.jflex";
+my $header =<<"__HEADER__";
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file was automatically generated by ${script_name}
+// from: ${emoji_data_url} 
+
+__HEADER__
+
+my $property_ranges = {};
+my $wanted_properties = { 'Emoji' => 1, 'Emoji_Modifier' => 1, 'Emoji_Modifier_Base' => 1, 'Extended_Pictographic' => 1 };
+
+parse_emoji_data_file($emoji_data_url, $property_ranges, $wanted_properties);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+output_jflex_include_file($output_path, $property_ranges);
+
+
+# sub parse_emoji_data_file
+#
+# Downloads and parses the emoji_data.txt file, extracting code point ranges
+# assigned to property values with age not younger than the passed-in version,
+# except for the Extended_Pictographic property, for which all code point ranges
+# are extracted, regardless of age.
+#
+# Parameters:
+#
+#  - Emoji data file URL
+#  - Reference to hash of properties mapped to an array of alternating (start,end) code point ranges
+#  - Reference to hash of wanted property names
+#
+sub parse_emoji_data_file {
+    my $url = shift;
+    my $prop_ranges = shift;
+    my $wanted_props = shift;
+    my $content = get_URL_content($url);
+    print STDERR "Parsing '$url'...";
+    my @lines = split /\r?\n/, $content;
+    for (@lines) {
+        ## 231A..231B    ; Emoji_Presentation   #  1.1  [2] (โŒš..โŒ›)    watch..hourglass done
+        ## 1F9C0         ; Emoji_Presentation   #  8.0  [1] (๐Ÿง€)       cheese wedge
+        ## 1FA00..1FA5F  ; Extended_Pictographic#   NA [96] (๐Ÿจ€๏ธ..๐ŸฉŸ๏ธ)    <reserved-1FA00>..<reserved-1FA5F>
+        if (my ($start,$end,$prop) = /^([0-9A-F]{4,5})(?:\.\.([0-9A-F]{4,5}))?\s*;\s*([^\s#]+)/) {
+            next unless defined($wanted_props->{$prop});  # Skip unless we want ranges for this property
+            
+            if (not defined($prop_ranges->{$prop})) {
+                $prop_ranges->{$prop} = [];
+            }
+            $end = $start unless defined($end);
+            my $start_dec = hex $start;
+            my $end_dec = hex $end;
+            my $ranges = $prop_ranges->{$prop};
+            if (scalar(@$ranges) == 0 || $start_dec > $ranges->[-1] + 1) { # Can't merge range with previous range
+                # print STDERR "Adding new range ($start, $end)\n";
+                push @$ranges, $start_dec, $end_dec;
+            } else {
+                # printf STDERR "Merging range (%s, %s) with previous range (%X, %X)\n", $start, $end, $ranges->[-2], $ranges->[-1];
+                $ranges->[-1] = $end_dec;
+            }
+        } else {
+            # print STDERR "Skipping line (no data): $_\n";
+        }
+    }
+    print STDERR "done.\n";
+}
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+# Parameter:
+#
+#  - URL to get content for
+#
+sub get_URL_content {
+    my $url = shift;
+    print STDERR "Retrieving '$url'...";
+    my $user_agent = LWP::UserAgent->new;
+    my $request = HTTP::Request->new(GET => $url);
+    my $response = $user_agent->request($request);
+    unless ($response->is_success) {
+        print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+        exit 1;
+    }
+    print STDERR "done.\n";
+    return $response->content;
+}
+
+
+# sub output_jflex_include_file
+#
+# Parameters:
+#
+#  - Output path
+#  - Reference to hash mapping properties to an array of alternating (start,end) codepoint ranges
+#     
+sub output_jflex_include_file {
+    my $path = shift;
+    my $prop_ranges = shift;
+    open OUT, ">$path"
+        || die "Error opening '$path' for writing: $!";
+
+    print STDERR "Writing '$path'...";
+
+    print OUT $header;
+
+    for my $prop (sort keys %$prop_ranges) {
+        my $ranges = $prop_ranges->{$prop};
+        print OUT "$prop = [";
+        for (my $index = 0 ; $index < scalar(@$ranges) ; $index += 2) {
+            printf OUT "\\u{%X}", $ranges->[$index];
+            printf OUT "-\\u{%X}", $ranges->[$index + 1] if ($ranges->[$index + 1] > $ranges->[$index]);
+        }
+        print OUT "]\n";
+    }
+
+    print OUT "\n";
+    close OUT;
+    print STDERR "done.\n";
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/core/src/data/jflex/skeleton.default
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/skeleton.default b/lucene/core/src/data/jflex/skeleton.default
new file mode 100644
index 0000000..9e08fbb
--- /dev/null
+++ b/lucene/core/src/data/jflex/skeleton.default
@@ -0,0 +1,342 @@
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+--- private static final int ZZ_BUFFERSIZE = ...;
+
+  /** lexical states */
+---  lexical states, charmap
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unknown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+--- isFinal list
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true iff the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+--- user class code
+
+--- constructor declaration
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[zzBuffer.length*2];
+      System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+      zzBuffer = newBuffer;
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+    }
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
+        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+        }
+      }
+      /* potentially more input available */
+      return false;
+    }
+
+    /* numRead < 0 ==> end of stream */
+    return true;
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+--- zzScanError declaration
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+--- throws clause
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+--- yypushback decl (contains zzScanError exception)
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+--- zzDoEOF
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+--- yylex declaration
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+--- local declarations
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+--- start admin (line, char, col count)
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+--- start admin (lexstate etc)
+
+      zzForAction: {
+        while (true) {
+    
+--- next input, line, col, char count, next transition, isFinal action
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+--- line count update
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+--- char count update
+
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+--- eofvalue
+      }
+      else {
+--- actions
+          default:
+--- no match
+        }
+      }
+    }
+  }
+
+--- main
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
new file mode 100644
index 0000000..a9dabcf
--- /dev/null
+++ b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
@@ -0,0 +1,348 @@
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+--- private static final int ZZ_BUFFERSIZE = ...;
+
+  /** lexical states */
+---  lexical states, charmap
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unknown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+--- isFinal list
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true iff the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+--- user class code
+
+--- constructor declaration
+
+/* -------------------------------------------------------------------------------- */
+/* Begin Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;
+    if (requested == 0) {
+      return true;
+    }
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+        if (numRead == requested) { // We might have requested too few chars to encode a full Unicode character.
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+          if (numRead == 1) {
+            return true;
+          }
+        } else {                    // There is room in the buffer for at least one more char
+          int c = zzReader.read();  // Expecting to read a low surrogate char
+          if (c == -1) {
+            return true;
+          } else {
+            zzBuffer[zzEndRead++] = (char)c;
+            return false;
+          }
+        }
+      }
+      /* potentially more input available */
+      return false;
+    }
+
+    /* numRead < 0 ==> end of stream */
+    return true;
+  }
+
+/* End Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+/* ------------------------------------------------------------------------------ */
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+--- zzScanError declaration
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+--- throws clause
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+--- yypushback decl (contains zzScanError exception)
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+--- zzDoEOF
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+--- yylex declaration
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+--- local declarations
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+--- start admin (line, char, col count)
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+--- start admin (lexstate etc)
+
+      zzForAction: {
+        while (true) {
+    
+--- next input, line, col, char count, next transition, isFinal action
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+--- line count update
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+--- char count update
+
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+--- eofvalue
+      }
+      else {
+--- actions
+          default:
+--- no match
+        }
+      }
+    }
+  }
+
+--- main
+
+}


[18/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
new file mode 100644
index 0000000..027a1b5
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
@@ -0,0 +1,8276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
+ * from: http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakTest.txt
+ *
+ * WordBreakTest.txt indicates the points in the provided character sequences
+ * at which conforming implementations must and must not break words.  This
+ * class tests for expected token extraction from each of the test sequences
+ * in WordBreakTest.txt, where the expected tokens are those character
+ * sequences bounded by word breaks and containing at least one character
+ * from one of the following character sets:
+ *
+ *    \p{Script = Han}                (From http://www.unicode.org/Public/9.0.0/ucd/Scripts.txt)
+ *    \p{Script = Hiragana}
+ *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/9.0.0/ucd/LineBreak.txt)
+ *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakProperty.txt)
+ *    \p{WordBreak = Hebrew_Letter}
+ *    \p{WordBreak = Katakana}
+ *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
+ *    [\uFF10-\uFF19]                 (Full-width Arabic digits)
+ */
+@Ignore
+public class WordBreakTestUnicode_9_0_0 extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    // รท 0001 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0001",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
+                     new String[] {  });
+
+    // รท 0001 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\r",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
+                     new String[] {  });
+
+    // รท 0001 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\n",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
+                     new String[] {  });
+
+    // รท 0001 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u000B",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
+                     new String[] {  });
+
+    // รท 0001 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 0001 ร— 0308 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 0001 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 0001 ร— 0308 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 0001 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u003A",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
+                     new String[] {  });
+
+    // รท 0001 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002C",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
+                     new String[] {  });
+
+    // รท 0001 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002E",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
+                     new String[] {  });
+
+    // รท 0001 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 0001 ร— 0308 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 0001 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u005F",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
+                     new String[] {  });
+
+    // รท 0001 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 0001 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 0001 ร— 0308 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 0001 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\"",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
+                     new String[] {  });
+
+    // รท 0001 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0027",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
+                     new String[] {  });
+
+    // รท 0001 รท 261D รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 0001 ร— 0308 รท 261D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 0001 รท 1F3FB รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 0001 ร— 0308 รท 1F3FB รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 0001 รท 2764 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 0001 ร— 0308 รท 2764 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 0001 รท 1F466 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 0001 ร— 0308 รท 1F466 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 0001 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u00AD",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 0001 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0300",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
+                     new String[] {  });
+
+    // รท 0001 ร— 200D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u200D",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 200D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u200D",
+                     new String[] {  });
+
+    // รท 0001 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 0001 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 0001 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0001",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000D รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\r",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\r",
+                     new String[] {  });
+
+    // รท 000D ร— 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) ร— [3.0] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\n",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\n",
+                     new String[] {  });
+
+    // รท 000D รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u000B",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000D รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000D รท 0308 รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000D รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000D รท 0308 รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000D รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u003A",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000D รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002C",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000D รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002E",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000D รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000D รท 0308 รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000D รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u005F",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000D รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000D รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000D รท 0308 รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000D รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\"",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\"",
+                     new String[] {  });
+
+    // รท 000D รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0027",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000D รท 261D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000D รท 0308 รท 261D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000D รท 1F3FB รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000D รท 0308 รท 1F3FB รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000D รท 2764 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000D รท 0308 รท 2764 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000D รท 1F466 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000D รท 0308 รท 1F466 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000D รท 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u00AD",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000D รท 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0300",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000D รท 200D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u200D",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 200D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000D รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000D รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000D รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0001",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000A รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\r",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\r",
+                     new String[] {  });
+
+    // รท 000A รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\n",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\n",
+                     new String[] {  });
+
+    // รท 000A รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u000B",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000A รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000A รท 0308 รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000A รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000A รท 0308 รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000A รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u003A",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000A รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002C",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000A รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002E",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000A รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000A รท 0308 รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000A รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u005F",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000A รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000A รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000A รท 0308 รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000A รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\"",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\"",
+                     new String[] {  });
+
+    // รท 000A รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0027",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000A รท 261D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000A รท 0308 รท 261D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000A รท 1F3FB รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000A รท 0308 รท 1F3FB รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000A รท 2764 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000A รท 0308 รท 2764 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000A รท 1F466 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000A รท 0308 รท 1F466 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000A รท 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u00AD",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000A รท 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0300",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000A รท 200D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u200D",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 200D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000A รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000A รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000A รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0001",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000B รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\r",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
+                     new String[] {  });
+
+    // รท 000B รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\n",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
+                     new String[] {  });
+
+    // รท 000B รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u000B",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000B รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000B รท 0308 รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000B รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000B รท 0308 รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000B รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u003A",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000B รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002C",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000B รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002E",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000B รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000B รท 0308 รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000B รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u005F",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000B รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000B รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000B รท 0308 รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000B รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\"",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
+                     new String[] {  });
+
+    // รท 000B รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0027",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000B รท 261D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000B รท 0308 รท 261D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000B รท 1F3FB รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000B รท 0308 รท 1F3FB รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000B รท 2764 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000B รท 0308 รท 2764 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000B รท 1F466 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000B รท 0308 รท 1F466 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000B รท 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u00AD",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000B รท 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0300",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000B รท 200D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u200D",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 200D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000B รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000B รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000B รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 3031 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0001",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\r",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\n",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u000B",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u3031",
+                     new String[] { "\u3031\u3031" });
+
+    // รท 3031 ร— 0308 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
+                     new String[] { "\u3031\u0308\u3031" });
+
+    // รท 3031 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0041",
+                     new String[] { "\u3031", "\u0041" });
+
+    // รท 3031 ร— 0308 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
+                     new String[] { "\u3031\u0308", "\u0041" });
+
+    // รท 3031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u003A",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002C",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002E",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0030",
+                     new String[] { "\u3031", "\u0030" });
+
+    // รท 3031 ร— 0308 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
+                     new String[] { "\u3031\u0308", "\u0030" });
+
+    // รท 3031 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u005F",
+                     new String[] { "\u3031\u005F" });
+
+    // รท 3031 ร— 0308 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
+                     new String[] { "\u3031\u0308\u005F" });
+
+    // รท 3031 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u05D0",
+                     new String[] { "\u3031", "\u05D0" });
+
+    // รท 3031 ร— 0308 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
+                     new String[] { "\u3031\u0308", "\u05D0" });
+
+    // รท 3031 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\"",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0027",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 261D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u261D",
+                     new String[] { "\u3031", "\u261D" });
+
+    // รท 3031 ร— 0308 รท 261D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u261D",
+                     new String[] { "\u3031\u0308", "\u261D" });
+
+    // รท 3031 รท 1F3FB รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDFFB",
+                     new String[] { "\u3031", "\uD83C\uDFFB" });
+
+    // รท 3031 ร— 0308 รท 1F3FB รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDFFB",
+                     new String[] { "\u3031\u0308", "\uD83C\uDFFB" });
+
+    // รท 3031 รท 2764 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u2764",
+                     new String[] { "\u3031", "\u2764" });
+
+    // รท 3031 ร— 0308 รท 2764 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u2764",
+                     new String[] { "\u3031\u0308", "\u2764" });
+
+    // รท 3031 รท 1F466 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83D\uDC66",
+                     new String[] { "\u3031", "\uD83D\uDC66" });
+
+    // รท 3031 ร— 0308 รท 1F466 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83D\uDC66",
+                     new String[] { "\u3031\u0308", "\uD83D\uDC66" });
+
+    // รท 3031 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u00AD",
+                     new String[] { "\u3031\u00AD" });
+
+    // รท 3031 ร— 0308 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
+                     new String[] { "\u3031\u0308\u00AD" });
+
+    // รท 3031 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0300",
+                     new String[] { "\u3031\u0300" });
+
+    // รท 3031 ร— 0308 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
+                     new String[] { "\u3031\u0308\u0300" });
+
+    // รท 3031 ร— 200D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u200D",
+                     new String[] { "\u3031\u200D" });
+
+    // รท 3031 ร— 0308 ร— 200D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u200D",
+                     new String[] { "\u3031\u0308\u200D" });
+
+    // รท 3031 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
+                     new String[] { "\u3031", "\u0061\u2060" });
+
+    // รท 3031 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
+                     new String[] { "\u3031\u0308", "\u0061\u2060" });
+
+    // รท 3031 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 0041 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0001",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\r",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\n",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u000B",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u3031",
+                     new String[] { "\u0041", "\u3031" });
+
+    // รท 0041 ร— 0308 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
+                     new String[] { "\u0041\u0308", "\u3031" });
+
+    // รท 0041 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0041",
+                     new String[] { "\u0041\u0041" });
+
+    // รท 0041 ร— 0308 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
+                     new String[] { "\u0041\u0308\u0041" });
+
+    // รท 0041 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u003A",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002C",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002E",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0030",
+                     new String[] { "\u0041\u0030" });
+
+    // รท 0041 ร— 0308 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
+                     new String[] { "\u0041\u0308\u0030" });
+
+    // รท 0041 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u005F",
+                     new String[] { "\u0041\u005F" });
+
+    // รท 0041 ร— 0308 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
+                     new String[] { "\u0041\u0308\u005F" });
+
+    // รท 0041 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u05D0",
+                     new String[] { "\u0041\u05D0" });
+
+    // รท 0041 ร— 0308 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
+                     new String[] { "\u0041\u0308\u05D0" });
+
+    // รท 0041 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\"",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0027",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 261D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u261D",
+                     new String[] { "\u0041", "\u261D" });
+
+    // รท 0041 ร— 0308 รท 261D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u261D",
+                     new String[] { "\u0041\u0308", "\u261D" });
+
+    // รท 0041 รท 1F3FB รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDFFB",
+                     new String[] { "\u0041", "\uD83C\uDFFB" });
+
+    // รท 0041 ร— 0308 รท 1F3FB รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDFFB",
+                     new String[] { "\u0041\u0308", "\uD83C\uDFFB" });
+
+    // รท 0041 รท 2764 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u2764",
+                     new String[] { "\u0041", "\u2764" });
+
+    // รท 0041 ร— 0308 รท 2764 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u2764",
+                     new String[] { "\u0041\u0308", "\u2764" });
+
+    // รท 0041 รท 1F466 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83D\uDC66",
+                     new String[] { "\u0041", "\uD83D\uDC66" });
+
+    // รท 0041 ร— 0308 รท 1F466 รท  #  รท [0.2] LATIN CAPITA

<TRUNCATED>

[21/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
index 8b288c2..a2ad394 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
@@ -65,147 +66,212 @@ public final class StandardTokenizerImpl {
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\42\0\1\15\4\0\1\14\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\57\0\1\1"+
-    "\2\0\1\3\7\0\1\1\1\0\1\6\2\0\1\1\5\0\27\1"+
-    "\1\0\37\1\1\0\u01ca\1\4\0\14\1\5\0\1\6\10\0\5\1"+
-    "\7\0\1\1\1\0\1\1\21\0\160\3\5\1\1\0\2\1\2\0"+
-    "\4\1\1\7\7\0\1\1\1\6\3\1\1\0\1\1\1\0\24\1"+
-    "\1\0\123\1\1\0\213\1\1\0\7\3\236\1\11\0\46\1\2\0"+
-    "\1\1\7\0\47\1\1\0\1\7\7\0\55\3\1\0\1\3\1\0"+
-    "\2\3\1\0\2\3\1\0\1\3\10\0\33\16\5\0\3\16\1\1"+
-    "\1\6\13\0\5\3\7\0\2\7\2\0\13\3\1\0\1\3\3\0"+
-    "\53\1\25\3\12\4\1\0\1\4\1\7\1\0\2\1\1\3\143\1"+
-    "\1\0\1\1\10\3\1\0\6\3\2\1\2\3\1\0\4\3\2\1"+
-    "\12\4\3\1\2\0\1\1\17\0\1\3\1\1\1\3\36\1\33\3"+
-    "\2\0\131\1\13\3\1\1\16\0\12\4\41\1\11\3\2\1\2\0"+
-    "\1\7\1\0\1\1\5\0\26\1\4\3\1\1\11\3\1\1\3\3"+
-    "\1\1\5\3\22\0\31\1\3\3\104\0\1\1\1\0\13\1\67\0"+
-    "\33\3\1\0\4\3\66\1\3\3\1\1\22\3\1\1\7\3\12\1"+
-    "\2\3\2\0\12\4\1\0\7\1\1\0\7\1\1\0\3\3\1\0"+
-    "\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0"+
-    "\4\1\2\0\1\3\1\1\7\3\2\0\2\3\2\0\3\3\1\1"+
-    "\10\0\1\3\4\0\2\1\1\0\3\1\2\3\2\0\12\4\2\1"+
-    "\17\0\3\3\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1"+
-    "\1\0\2\1\1\0\2\1\1\0\2\1\2\0\1\3\1\0\5\3"+
-    "\4\0\2\3\2\0\3\3\3\0\1\3\7\0\4\1\1\0\1\1"+
-    "\7\0\12\4\2\3\3\1\1\3\13\0\3\3\1\0\11\1\1\0"+
-    "\3\1\1\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0"+
-    "\1\3\1\1\10\3\1\0\3\3\1\0\3\3\2\0\1\1\17\0"+
-    "\2\1\2\3\2\0\12\4\21\0\3\3\1\0\10\1\2\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\3"+
-    "\1\1\7\3\2\0\2\3\2\0\3\3\10\0\2\3\4\0\2\1"+
-    "\1\0\3\1\2\3\2\0\12\4\1\0\1\1\20\0\1\3\1\1"+
-    "\1\0\6\1\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1"+
-    "\1\0\2\1\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\3"+
-    "\3\0\3\3\1\0\4\3\2\0\1\1\6\0\1\3\16\0\12\4"+
-    "\21\0\3\3\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1"+
-    "\1\0\5\1\3\0\1\1\7\3\1\0\3\3\1\0\4\3\7\0"+
-    "\2\3\1\0\2\1\6\0\2\1\2\3\2\0\12\4\22\0\2\3"+
-    "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+
-    "\2\0\1\3\1\1\7\3\1\0\3\3\1\0\4\3\7\0\2\3"+
-    "\7\0\1\1\1\0\2\1\2\3\2\0\12\4\1\0\2\1\17\0"+
-    "\2\3\1\0\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\3"+
-    "\1\0\3\3\1\0\4\3\1\1\10\0\1\3\10\0\2\1\2\3"+
-    "\2\0\12\4\12\0\6\1\2\0\2\3\1\0\22\1\3\0\30\1"+
-    "\1\0\11\1\1\0\1\1\2\0\7\1\3\0\1\3\4\0\6\3"+
-    "\1\0\1\3\1\0\10\3\22\0\2\3\15\0\60\20\1\21\2\20"+
-    "\7\21\5\0\7\20\10\21\1\0\12\4\47\0\2\20\1\0\1\20"+
-    "\2\0\2\20\1\0\1\20\2\0\1\20\6\0\4\20\1\0\7\20"+
-    "\1\0\3\20\1\0\1\20\1\0\1\20\2\0\2\20\1\0\4\20"+
-    "\1\21\2\20\6\21\1\0\2\21\1\20\2\0\5\20\1\0\1\20"+
-    "\1\0\6\21\2\0\12\4\2\0\4\20\40\0\1\1\27\0\2\3"+
-    "\6\0\12\4\13\0\1\3\1\0\1\3\1\0\1\3\4\0\2\3"+
-    "\10\1\1\0\44\1\4\0\24\3\1\0\2\3\5\1\13\3\1\0"+
-    "\44\3\11\0\1\3\71\0\53\20\24\21\1\20\12\4\6\0\6\20"+
-    "\4\21\4\20\3\21\1\20\3\21\2\20\7\21\3\20\4\21\15\20"+
-    "\14\21\1\20\1\21\12\4\4\21\2\20\46\1\1\0\1\1\5\0"+
-    "\1\1\2\0\53\1\1\0\4\1\u0100\2\111\1\1\0\4\1\2\0"+
-    "\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0\4\1\2\0"+
-    "\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+
-    "\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0\3\3\40\0"+
-    "\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0\32\1\5\0"+
-    "\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\3\13\0\22\1"+
-    "\3\3\13\0\22\1\2\3\14\0\15\1\1\0\3\1\1\0\2\3"+
-    "\14\0\64\20\40\21\3\0\1\20\4\0\1\20\1\21\2\0\12\4"+
-    "\41\0\4\3\1\0\12\4\6\0\130\1\10\0\51\1\1\3\1\1"+
-    "\5\0\106\1\12\0\35\1\3\0\14\3\4\0\14\3\12\0\12\4"+
-    "\36\20\2\0\5\20\13\0\54\20\4\0\21\21\7\20\2\21\6\0"+
-    "\12\4\1\20\3\0\2\20\40\0\27\1\5\3\4\0\65\20\12\21"+
-    "\1\0\35\21\2\0\1\3\12\4\6\0\12\4\6\0\16\20\122\0"+
-    "\5\3\57\1\21\3\7\1\4\0\12\4\21\0\11\3\14\0\3\3"+
-    "\36\1\15\3\2\1\12\4\54\1\16\3\14\0\44\1\24\3\10\0"+
-    "\12\4\3\0\3\1\12\4\44\1\122\0\3\3\1\0\25\3\4\1"+
-    "\1\3\4\1\3\3\2\1\11\0\300\1\47\3\25\0\4\3\u0116\1"+
-    "\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1"+
-    "\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1"+
-    "\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1"+
-    "\4\0\15\1\5\0\3\1\1\0\7\1\17\0\4\3\10\0\2\10"+
-    "\12\0\1\10\2\0\1\6\2\0\5\3\20\0\2\11\3\0\1\7"+
-    "\17\0\1\11\13\0\5\3\1\0\12\3\1\0\1\1\15\0\1\1"+
-    "\20\0\15\1\63\0\41\3\21\0\1\1\4\0\1\1\2\0\12\1"+
-    "\1\0\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1"+
-    "\1\0\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1"+
-    "\21\0\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1"+
-    "\6\0\4\1\3\3\2\1\14\0\46\1\1\0\1\1\5\0\1\1"+
-    "\2\0\70\1\7\0\1\1\17\0\1\3\27\1\11\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\40\3\57\0\1\1\120\0\32\12\1\0"+
-    "\131\12\14\0\326\12\57\0\1\1\1\0\1\12\31\0\11\12\6\3"+
-    "\1\0\5\5\2\0\3\12\1\1\1\1\4\0\126\13\2\0\2\3"+
-    "\2\5\3\13\133\5\1\0\4\5\5\0\51\1\3\0\136\2\21\0"+
-    "\33\1\65\0\20\5\320\0\57\5\1\0\130\5\250\0\u19b6\12\112\0"+
-    "\u51cd\12\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\4"+
-    "\2\1\24\0\57\1\4\3\1\0\12\3\1\0\31\1\7\0\1\3"+
-    "\120\1\2\3\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
-    "\14\0\13\1\115\0\12\1\1\3\3\1\1\3\4\1\1\3\27\1"+
-    "\5\3\30\0\64\1\14\0\2\3\62\1\21\3\13\0\12\4\6\0"+
-    "\22\3\6\1\3\0\1\1\4\0\12\4\34\1\10\3\2\0\27\1"+
-    "\15\3\14\0\35\2\3\0\4\3\57\1\16\3\16\0\1\1\12\4"+
-    "\46\0\51\1\16\3\11\0\3\1\1\3\10\1\2\3\2\0\12\4"+
-    "\6\0\33\20\1\21\4\0\60\20\1\21\1\20\3\21\2\20\2\21"+
-    "\5\20\2\21\1\20\1\21\1\20\30\0\5\20\13\1\5\3\2\0"+
-    "\3\1\2\3\12\0\6\1\2\0\6\1\2\0\6\1\11\0\7\1"+
-    "\1\0\7\1\221\0\43\1\10\3\1\0\2\3\2\0\12\4\6\0"+
-    "\u2ba4\2\14\0\27\2\4\0\61\2\u2104\0\u016e\12\2\0\152\12\46\0"+
-    "\7\1\14\0\5\1\5\0\1\16\1\3\12\16\1\0\15\16\1\0"+
-    "\5\16\1\0\1\16\1\0\2\16\1\0\2\16\1\0\12\16\142\1"+
-    "\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\3"+
-    "\1\7\2\0\1\6\1\7\13\0\7\3\14\0\2\11\30\0\3\11"+
-    "\1\7\1\0\1\10\1\0\1\7\1\6\32\0\5\1\1\0\207\1"+
-    "\2\0\1\3\7\0\1\10\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\13\0\70\5"+
-    "\2\3\37\2\3\0\6\2\2\0\6\2\2\0\6\2\2\0\3\2"+
-    "\34\0\3\3\4\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1"+
-    "\1\0\17\1\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\3"+
-    "\202\0\35\1\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1"+
-    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\4"+
-    "\u0356\0\6\1\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1"+
-    "\2\0\27\1\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1"+
-    "\100\0\1\1\3\3\1\0\2\3\5\0\4\3\4\1\1\0\3\1"+
-    "\1\0\33\1\4\0\3\3\4\0\1\3\40\0\35\1\203\0\66\1"+
-    "\12\0\26\1\12\0\23\1\215\0\111\1\u03b7\0\3\3\65\1\17\3"+
-    "\37\0\12\4\20\0\3\3\55\1\13\3\2\0\1\3\22\0\31\1"+
-    "\7\0\12\4\6\0\3\3\44\1\16\3\1\0\12\4\100\0\3\3"+
-    "\60\1\16\3\4\1\13\0\12\4\u04a6\0\53\1\15\3\10\0\12\4"+
-    "\u0936\0\u036f\1\221\0\143\1\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1"+
-    "\13\0\1\1\56\3\20\0\4\3\15\1\u4060\0\1\5\1\13\u2163\0"+
-    "\5\3\3\0\26\3\2\0\7\3\36\0\4\3\224\0\3\3\u01bb\0"+
-    "\125\1\1\0\107\1\1\0\2\1\2\0\1\1\2\0\2\1\2\0"+
-    "\4\1\1\0\14\1\1\0\1\1\1\0\7\1\1\0\101\1\1\0"+
-    "\4\1\2\0\10\1\1\0\7\1\1\0\34\1\1\0\4\1\1\0"+
-    "\5\1\1\0\1\1\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0"+
-    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
-    "\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0\10\1\2\0"+
-    "\62\4\u1600\0\4\1\1\0\33\1\1\0\2\1\1\0\1\1\2\0"+
-    "\1\1\1\0\12\1\1\0\4\1\1\0\1\1\1\0\1\1\6\0"+
-    "\1\1\4\0\1\1\1\0\1\1\1\0\1\1\1\0\3\1\1\0"+
-    "\2\1\1\0\1\1\2\0\1\1\1\0\1\1\1\0\1\1\1\0"+
-    "\1\1\1\0\1\1\1\0\2\1\1\0\1\1\2\0\4\1\1\0"+
-    "\7\1\1\0\4\1\1\0\4\1\1\0\1\1\1\0\12\1\1\0"+
-    "\21\1\5\0\3\1\1\0\5\1\1\0\21\1\u032a\0\32\17\1\13"+
-    "\u0dff\0\ua6d7\12\51\0\u1035\12\13\0\336\12\u3fe2\0\u021e\12\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
-    "\1\3\36\0\140\3\200\0\360\3\uffff\0\uffff\0\ufe12\0";
+    "\42\0\1\32\1\7\3\0\1\31\2\0\1\7\1\0\1\24\1\0"+
+    "\1\25\1\0\12\21\1\23\1\24\5\0\32\15\4\0\1\26\1\0"+
+    "\32\15\56\0\1\4\1\15\2\0\1\5\1\4\6\0\1\15\1\0"+
+    "\1\23\2\0\1\15\5\0\27\15\1\0\37\15\1\0\u01ca\15\4\0"+
+    "\14\15\5\0\1\23\10\0\5\15\7\0\1\15\1\0\1\15\21\0"+
+    "\160\5\5\15\1\0\2\15\2\0\4\15\1\24\1\15\6\0\1\15"+
+    "\1\23\3\15\1\0\1\15\1\0\24\15\1\0\123\15\1\0\213\15"+
+    "\1\0\7\5\246\15\1\0\46\15\2\0\1\15\7\0\47\15\1\0"+
+    "\1\24\7\0\55\5\1\0\1\5\1\0\2\5\1\0\2\5\1\0"+
+    "\1\5\10\0\33\33\5\0\3\33\1\15\1\23\13\0\6\5\6\0"+
+    "\2\24\2\0\13\5\1\0\1\5\3\0\53\15\25\5\12\20\1\0"+
+    "\1\20\1\24\1\0\2\15\1\5\143\15\1\0\1\15\10\5\1\0"+
+    "\6\5\2\15\2\5\1\0\4\5\2\15\12\20\3\15\2\0\1\15"+
+    "\17\0\1\5\1\15\1\5\36\15\33\5\2\0\131\15\13\5\1\15"+
+    "\16\0\12\20\41\15\11\5\2\15\2\0\1\24\1\0\1\15\5\0"+
+    "\26\15\4\5\1\15\11\5\1\15\3\5\1\15\5\5\22\0\31\15"+
+    "\3\5\104\0\25\15\1\0\10\15\26\0\60\5\66\15\3\5\1\15"+
+    "\22\5\1\15\7\5\12\15\2\5\2\0\12\20\1\0\20\15\3\5"+
+    "\1\0\10\15\2\0\2\15\2\0\26\15\1\0\7\15\1\0\1\15"+
+    "\3\0\4\15\2\0\1\5\1\15\7\5\2\0\2\5\2\0\3\5"+
+    "\1\15\10\0\1\5\4\0\2\15\1\0\3\15\2\5\2\0\12\20"+
+    "\2\15\17\0\3\5\1\0\6\15\4\0\2\15\2\0\26\15\1\0"+
+    "\7\15\1\0\2\15\1\0\2\15\1\0\2\15\2\0\1\5\1\0"+
+    "\5\5\4\0\2\5\2\0\3\5\3\0\1\5\7\0\4\15\1\0"+
+    "\1\15\7\0\12\20\2\5\3\15\1\5\13\0\3\5\1\0\11\15"+
+    "\1\0\3\15\1\0\26\15\1\0\7\15\1\0\2\15\1\0\5\15"+
+    "\2\0\1\5\1\15\10\5\1\0\3\5\1\0\3\5\2\0\1\15"+
+    "\17\0\2\15\2\5\2\0\12\20\11\0\1\15\7\0\3\5\1\0"+
+    "\10\15\2\0\2\15\2\0\26\15\1\0\7\15\1\0\2\15\1\0"+
+    "\5\15\2\0\1\5\1\15\7\5\2\0\2\5\2\0\3\5\10\0"+
+    "\2\5\4\0\2\15\1\0\3\15\2\5\2\0\12\20\1\0\1\15"+
+    "\20\0\1\5\1\15\1\0\6\15\3\0\3\15\1\0\4\15\3\0"+
+    "\2\15\1\0\1\15\1\0\2\15\3\0\2\15\3\0\3\15\3\0"+
+    "\14\15\4\0\5\5\3\0\3\5\1\0\4\5\2\0\1\15\6\0"+
+    "\1\5\16\0\12\20\20\0\4\5\1\0\10\15\1\0\3\15\1\0"+
+    "\27\15\1\0\20\15\3\0\1\15\7\5\1\0\3\5\1\0\4\5"+
+    "\7\0\2\5\1\0\3\15\5\0\2\15\2\5\2\0\12\20\20\0"+
+    "\1\15\3\5\1\0\10\15\1\0\3\15\1\0\27\15\1\0\12\15"+
+    "\1\0\5\15\2\0\1\5\1\15\7\5\1\0\3\5\1\0\4\5"+
+    "\7\0\2\5\7\0\1\15\1\0\2\15\2\5\2\0\12\20\1\0"+
+    "\2\15\16\0\3\5\1\0\10\15\1\0\3\15\1\0\51\15\2\0"+
+    "\1\15\7\5\1\0\3\5\1\0\4\5\1\15\5\0\3\15\1\5"+
+    "\7\0\3\15\2\5\2\0\12\20\12\0\6\15\2\0\2\5\1\0"+
+    "\22\15\3\0\30\15\1\0\11\15\1\0\1\15\2\0\7\15\3\0"+
+    "\1\5\4\0\6\5\1\0\1\5\1\0\10\5\6\0\12\20\2\0"+
+    "\2\5\15\0\60\34\1\35\2\34\7\35\5\0\7\34\10\35\1\0"+
+    "\12\20\47\0\2\34\1\0\1\34\2\0\2\34\1\0\1\34\2\0"+
+    "\1\34\6\0\4\34\1\0\7\34\1\0\3\34\1\0\1\34\1\0"+
+    "\1\34\2\0\2\34\1\0\4\34\1\35\2\34\6\35\1\0\2\35"+
+    "\1\34\2\0\5\34\1\0\1\34\1\0\6\35\2\0\12\20\2\0"+
+    "\4\34\40\0\1\15\27\0\2\5\6\0\12\20\13\0\1\5\1\0"+
+    "\1\5\1\0\1\5\4\0\2\5\10\15\1\0\44\15\4\0\24\5"+
+    "\1\0\2\5\5\15\13\5\1\0\44\5\11\0\1\5\71\0\53\34"+
+    "\24\35\1\34\12\20\6\0\6\34\4\35\4\34\3\35\1\34\3\35"+
+    "\2\34\7\35\3\34\4\35\15\34\14\35\1\34\1\35\12\20\4\35"+
+    "\2\34\46\15\1\0\1\15\5\0\1\15\2\0\53\15\1\0\4\15"+
+    "\u0100\17\111\15\1\0\4\15\2\0\7\15\1\0\1\15\1\0\4\15"+
+    "\2\0\51\15\1\0\4\15\2\0\41\15\1\0\4\15\2\0\7\15"+
+    "\1\0\1\15\1\0\4\15\2\0\17\15\1\0\71\15\1\0\4\15"+
+    "\2\0\103\15\2\0\3\5\40\0\20\15\20\0\126\15\2\0\6\15"+
+    "\3\0\u026c\15\2\0\21\15\1\0\32\15\5\0\113\15\3\0\13\15"+
+    "\7\0\15\15\1\0\4\15\3\5\13\0\22\15\3\5\13\0\22\15"+
+    "\2\5\14\0\15\15\1\0\3\15\1\0\2\5\14\0\64\34\40\35"+
+    "\3\0\1\34\4\0\1\34\1\35\2\0\12\20\41\0\4\5\1\0"+
+    "\12\20\6\0\130\15\10\0\5\15\2\5\42\15\1\5\1\15\5\0"+
+    "\106\15\12\0\37\15\1\0\14\5\4\0\14\5\12\0\12\20\36\34"+
+    "\2\0\5\34\13\0\54\34\4\0\32\34\6\0\12\20\1\34\3\0"+
+    "\2\34\40\0\27\15\5\5\4\0\65\34\12\35\1\0\35\35\2\0"+
+    "\1\5\12\20\6\0\12\20\6\0\16\34\2\0\17\5\101\0\5\5"+
+    "\57\15\21\5\7\15\4\0\12\20\21\0\11\5\14\0\3\5\36\15"+
+    "\15\5\2\15\12\20\54\15\16\5\14\0\44\15\24\5\10\0\12\20"+
+    "\3\0\3\15\12\20\44\15\2\0\11\15\107\0\3\5\1\0\25\5"+
+    "\4\15\1\5\4\15\3\5\2\15\1\0\2\5\6\0\300\15\66\5"+
+    "\5\0\5\5\u0116\15\2\0\6\15\2\0\46\15\2\0\6\15\2\0"+
+    "\10\15\1\0\1\15\1\0\1\15\1\0\1\15\1\0\37\15\2\0"+
+    "\65\15\1\0\7\15\1\0\1\15\3\0\3\15\1\0\7\15\3\0"+
+    "\4\15\2\0\6\15\4\0\15\15\5\0\3\15\1\0\7\15\17\0"+
+    "\1\5\1\12\2\5\10\0\2\25\12\0\1\25\2\0\1\23\2\0"+
+    "\5\5\1\26\14\0\1\4\2\0\2\26\3\0\1\24\4\0\1\4"+
+    "\12\0\1\26\13\0\5\5\1\0\12\5\1\0\1\15\15\0\1\15"+
+    "\20\0\15\15\63\0\23\5\1\10\15\5\21\0\1\15\4\0\1\15"+
+    "\2\0\12\15\1\0\1\15\3\0\5\15\4\0\1\4\1\0\1\15"+
+    "\1\0\1\15\1\0\1\15\1\0\4\15\1\0\12\15\1\16\2\0"+
+    "\4\15\5\0\5\15\4\0\1\15\21\0\51\15\13\0\6\4\17\0"+
+    "\2\4\u016f\0\2\4\14\0\1\4\137\0\1\4\106\0\1\4\31\0"+
+    "\13\4\4\0\3\4\273\0\14\15\1\16\47\15\300\0\2\4\12\0"+
+    "\1\4\11\0\1\4\72\0\4\4\1\0\5\4\1\4\1\0\7\4"+
+    "\1\4\2\4\1\4\1\4\1\0\2\4\2\4\1\4\4\4\1\3"+
+    "\2\4\1\4\1\4\2\4\2\4\1\4\3\4\1\4\3\4\2\4"+
+    "\10\4\3\4\5\4\1\4\1\4\1\4\5\4\14\4\13\4\2\4"+
+    "\2\4\1\4\1\4\2\4\1\4\1\4\22\4\1\4\2\4\2\4"+
+    "\6\4\12\0\2\4\6\4\1\4\1\4\1\4\2\4\3\4\2\4"+
+    "\10\4\2\4\4\4\2\4\13\4\2\4\5\4\2\4\2\4\1\4"+
+    "\5\4\2\4\1\4\1\4\1\4\2\4\24\4\2\4\5\4\6\4"+
+    "\1\4\2\4\1\3\1\4\2\4\1\4\4\4\1\4\2\4\1\4"+
+    "\2\0\2\4\4\3\1\4\1\4\2\4\1\4\1\0\1\4\1\0"+
+    "\1\4\6\0\1\4\3\0\1\4\6\0\1\4\12\0\2\4\17\0"+
+    "\1\4\2\0\1\4\4\0\1\4\1\0\1\4\4\0\3\4\1\0"+
+    "\1\4\13\0\2\4\3\4\55\0\3\4\11\0\1\4\16\0\1\4"+
+    "\16\0\1\4\u0174\0\2\4\u01cf\0\3\4\23\0\2\4\63\0\1\4"+
+    "\4\0\1\4\252\0\57\15\1\0\57\15\1\0\205\15\6\0\4\15"+
+    "\3\5\2\15\14\0\46\15\1\0\1\15\5\0\1\15\2\0\70\15"+
+    "\7\0\1\15\17\0\1\5\27\15\11\0\7\15\1\0\7\15\1\0"+
+    "\7\15\1\0\7\15\1\0\7\15\1\0\7\15\1\0\7\15\1\0"+
+    "\7\15\1\0\40\5\57\0\1\15\120\0\32\27\1\0\131\27\14\0"+
+    "\326\27\57\0\1\15\1\0\1\27\31\0\11\27\6\5\1\4\5\22"+
+    "\2\0\3\27\1\15\1\15\1\4\3\0\126\30\2\0\2\5\2\22"+
+    "\3\30\133\22\1\0\4\22\5\0\51\15\3\0\136\17\21\0\33\15"+
+    "\65\0\20\22\227\0\1\4\1\0\1\4\66\0\57\22\1\0\130\22"+
+    "\250\0\u19b6\27\112\0\u51d6\27\52\0\u048d\15\103\0\56\15\2\0\u010d\15"+
+    "\3\0\20\15\12\20\2\15\24\0\57\15\4\5\1\0\12\5\1\0"+
+    "\37\15\2\5\120\15\2\5\45\0\11\15\2\0\147\15\2\0\44\15"+
+    "\1\0\10\15\77\0\13\15\1\5\3\15\1\5\4\15\1\5\27\15"+
+    "\5\5\30\0\64\15\14\0\2\5\62\15\22\5\12\0\12\20\6\0"+
+    "\22\5\6\15\3\0\1\15\1\0\1\15\2\0\12\20\34\15\10\5"+
+    "\2\0\27\15\15\5\14\0\35\17\3\0\4\5\57\15\16\5\16\0"+
+    "\1\15\12\20\6\0\5\34\1\35\12\34\12\20\5\34\1\0\51\15"+
+    "\16\5\11\0\3\15\1\5\10\15\2\5\2\0\12\20\6\0\33\34"+
+    "\3\35\62\34\1\35\1\34\3\35\2\34\2\35\5\34\2\35\1\34"+
+    "\1\35\1\34\30\0\5\34\13\15\5\5\2\0\3\15\2\5\12\0"+
+    "\6\15\2\0\6\15\2\0\6\15\11\0\7\15\1\0\7\15\1\0"+
+    "\53\15\1\0\12\15\12\0\163\15\10\5\1\0\2\5\2\0\12\20"+
+    "\6\0\u2ba4\17\14\0\27\17\4\0\61\17\u2104\0\u016e\27\2\0\152\27"+
+    "\46\0\7\15\14\0\5\15\5\0\1\33\1\5\12\33\1\0\15\33"+
+    "\1\0\5\33\1\0\1\33\1\0\2\33\1\0\2\33\1\0\12\33"+
+    "\142\15\41\0\u016b\15\22\0\100\15\2\0\66\15\50\0\14\15\4\0"+
+    "\16\5\1\6\1\11\1\24\2\0\1\23\1\24\13\0\20\5\3\0"+
+    "\2\26\30\0\3\26\1\24\1\0\1\25\1\0\1\24\1\23\32\0"+
+    "\5\15\1\0\207\15\2\0\1\5\7\0\1\25\4\0\1\24\1\0"+
+    "\1\25\1\0\12\20\1\23\1\24\5\0\32\15\4\0\1\26\1\0"+
+    "\32\15\13\0\70\22\2\5\37\17\3\0\6\17\2\0\6\17\2\0"+
+    "\6\17\2\0\3\17\34\0\3\5\4\0\14\15\1\0\32\15\1\0"+
+    "\23\15\1\0\2\15\1\0\17\15\2\0\16\15\42\0\173\15\105\0"+
+    "\65\15\210\0\1\5\202\0\35\15\3\0\61\15\17\0\1\5\37\0"+
+    "\40\15\20\0\33\15\5\0\46\15\5\5\5\0\36\15\2\0\44\15"+
+    "\4\0\10\15\1\0\5\15\52\0\236\15\2\0\12\20\6\0\44\15"+
+    "\4\0\44\15\4\0\50\15\10\0\64\15\234\0\u0137\15\11\0\26\15"+
+    "\12\0\10\15\230\0\6\15\2\0\1\15\1\0\54\15\1\0\2\15"+
+    "\3\0\1\15\2\0\27\15\12\0\27\15\11\0\37\15\101\0\23\15"+
+    "\1\0\2\15\12\0\26\15\12\0\32\15\106\0\70\15\6\0\2\15"+
+    "\100\0\1\15\3\5\1\0\2\5\5\0\4\5\4\15\1\0\3\15"+
+    "\1\0\33\15\4\0\3\5\4\0\1\5\40\0\35\15\3\0\35\15"+
+    "\43\0\10\15\1\0\34\15\2\5\31\0\66\15\12\0\26\15\12\0"+
+    "\23\15\15\0\22\15\156\0\111\15\67\0\63\15\15\0\63\15\u030d\0"+
+    "\3\5\65\15\17\5\37\0\12\20\17\0\4\5\55\15\13\5\2\0"+
+    "\1\5\22\0\31\15\7\0\12\20\6\0\3\5\44\15\16\5\1\0"+
+    "\12\20\20\0\43\15\1\5\2\0\1\15\11\0\3\5\60\15\16\5"+
+    "\4\15\5\0\3\5\3\0\12\20\1\15\1\0\1\15\43\0\22\15"+
+    "\1\0\31\15\14\5\6\0\1\5\101\0\7\15\1\0\1\15\1\0"+
+    "\4\15\1\0\17\15\1\0\12\15\7\0\57\15\14\5\5\0\12\20"+
+    "\6\0\4\5\1\0\10\15\2\0\2\15\2\0\26\15\1\0\7\15"+
+    "\1\0\2\15\1\0\5\15\2\0\1\5\1\15\7\5\2\0\2\5"+
+    "\2\0\3\5\2\0\1\15\6\0\1\5\5\0\5\15\2\5\2\0"+
+    "\7\5\3\0\5\5\213\0\65\15\22\5\4\15\5\0\12\20\46\0"+
+    "\60\15\24\5\2\15\1\0\1\15\10\0\12\20\246\0\57\15\7\5"+
+    "\2\0\11\5\27\0\4\15\2\5\42\0\60\15\21\5\3\0\1\15"+
+    "\13\0\12\20\46\0\53\15\15\5\10\0\12\20\66\0\32\34\3\0"+
+    "\17\35\4\0\12\20\2\34\3\0\1\34\u0160\0\100\15\12\20\25\0"+
+    "\1\15\u01c0\0\71\15\u0107\0\11\15\1\0\45\15\10\5\1\0\10\5"+
+    "\1\15\17\0\12\20\30\0\36\15\2\0\26\5\1\0\16\5\u0349\0"+
+    "\u039a\15\146\0\157\15\21\0\304\15\u0abc\0\u042f\15\u0fd1\0\u0247\15\u21b9\0"+
+    "\u0239\15\7\0\37\15\1\0\12\20\146\0\36\15\2\0\5\5\13\0"+
+    "\60\15\7\5\11\0\4\15\14\0\12\20\11\0\25\15\5\0\23\15"+
+    "\u0370\0\105\15\13\0\1\15\56\5\20\0\4\5\15\15\100\0\1\15"+
+    "\u401f\0\1\22\1\30\u0bfe\0\153\15\5\0\15\15\3\0\11\15\7\0"+
+    "\12\15\3\0\2\5\1\0\4\5\u14c1\0\5\5\3\0\26\5\2\0"+
+    "\7\5\36\0\4\5\224\0\3\5\u01bb\0\125\15\1\0\107\15\1\0"+
+    "\2\15\2\0\1\15\2\0\2\15\2\0\4\15\1\0\14\15\1\0"+
+    "\1\15\1\0\7\15\1\0\101\15\1\0\4\15\2\0\10\15\1\0"+
+    "\7\15\1\0\34\15\1\0\4\15\1\0\5\15\1\0\1\15\3\0"+
+    "\7\15\1\0\u0154\15\2\0\31\15\1\0\31\15\1\0\37\15\1\0"+
+    "\31\15\1\0\37\15\1\0\31\15\1\0\37\15\1\0\31\15\1\0"+
+    "\37\15\1\0\31\15\1\0\10\15\2\0\62\20\u0200\0\67\5\4\0"+
+    "\62\5\10\0\1\5\16\0\1\5\26\0\5\5\1\0\17\5\u0550\0"+
+    "\7\5\1\0\21\5\2\0\7\5\1\0\2\5\1\0\5\5\u07d5\0"+
+    "\305\15\13\0\7\5\51\0\104\15\7\5\5\0\12\20\u04a6\0\4\15"+
+    "\1\0\33\15\1\0\2\15\1\0\1\15\2\0\1\15\1\0\12\15"+
+    "\1\0\4\15\1\0\1\15\1\0\1\15\6\0\1\15\4\0\1\15"+
+    "\1\0\1\15\1\0\1\15\1\0\3\15\1\0\2\15\1\0\1\15"+
+    "\2\0\1\15\1\0\1\15\1\0\1\15\1\0\1\15\1\0\1\15"+
+    "\1\0\2\15\1\0\1\15\2\0\4\15\1\0\7\15\1\0\4\15"+
+    "\1\0\4\15\1\0\1\15\1\0\12\15\1\0\21\15\5\0\3\15"+
+    "\1\0\5\15\1\0\21\15\u0144\0\4\4\1\4\312\4\1\4\60\4"+
+    "\15\0\3\4\37\0\1\4\32\15\6\0\32\15\2\0\4\4\2\16"+
+    "\14\15\2\16\12\15\4\0\1\4\2\0\12\4\22\0\71\4\32\1"+
+    "\1\30\2\4\15\4\12\0\1\4\24\0\1\4\2\0\11\4\1\0"+
+    "\4\4\11\0\7\4\2\4\256\4\42\4\2\4\141\4\1\3\16\4"+
+    "\2\4\2\4\1\4\3\4\2\4\44\4\3\3\2\4\1\3\2\4"+
+    "\3\3\44\4\2\4\3\4\1\4\4\4\5\2\102\4\2\3\2\4"+
+    "\13\3\25\4\4\3\4\4\1\3\1\4\11\3\3\4\1\3\4\4"+
+    "\3\3\1\4\3\3\42\4\1\3\123\4\1\4\77\4\10\0\3\4"+
+    "\6\4\1\4\30\4\7\4\2\4\2\4\1\4\2\3\4\4\1\3"+
+    "\14\4\1\4\2\4\4\4\2\4\1\3\4\4\2\3\15\4\2\4"+
+    "\2\4\1\4\10\4\2\4\11\4\1\4\5\4\3\4\14\4\3\4"+
+    "\10\4\3\4\2\4\1\4\1\4\1\4\4\4\1\4\6\4\1\4"+
+    "\3\4\1\4\6\4\113\4\3\3\3\4\5\3\60\0\43\4\1\3"+
+    "\20\4\3\3\11\4\1\3\5\4\5\4\1\4\1\3\6\4\15\4"+
+    "\6\4\3\4\1\4\1\4\2\4\3\4\1\4\2\4\7\4\6\4"+
+    "\164\0\14\4\125\0\53\4\14\0\4\4\70\0\10\4\12\0\6\4"+
+    "\50\0\10\4\36\0\122\4\14\0\4\4\10\4\5\3\1\4\2\3"+
+    "\6\4\1\3\11\4\12\3\1\4\1\0\1\4\2\3\1\4\6\4"+
+    "\1\0\52\4\2\4\4\4\3\4\1\4\1\4\47\4\15\4\5\4"+
+    "\2\3\1\4\2\3\6\4\3\4\15\4\1\4\15\3\42\4\u05fe\4"+
+    "\2\0\ua6d7\27\51\0\u1035\27\13\0\336\27\2\0\u1682\27\u295e\0\u021e\27"+
+    "\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
+    "\1\5\36\0\137\13\1\14\200\0\360\5\uffff\0\uffff\0\ufe12\0";
 
   /** 
    * Translates characters to character classes
@@ -218,12 +284,15 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
-    "\1\7\1\2\1\1\1\10\1\2\1\0\1\2\1\0"+
-    "\1\4\1\0\2\2\2\0\1\1\1\0";
+    "\1\0\2\1\3\2\2\1\1\3\1\2\1\4\2\5"+
+    "\1\6\1\1\1\7\1\10\1\3\1\11\1\2\1\0"+
+    "\4\2\1\0\1\2\2\0\1\3\1\0\1\3\2\2"+
+    "\1\0\1\5\1\2\1\5\1\0\2\3\1\0\2\2"+
+    "\2\0\1\2\1\0\2\3\5\2\1\0\1\2\1\3"+
+    "\3\2";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -248,12 +317,17 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\22\0\44\0\66\0\110\0\132\0\154\0\176"+
-    "\0\220\0\242\0\264\0\306\0\330\0\352\0\374\0\u010e"+
-    "\0\u0120\0\154\0\u0132\0\u0144\0\u0156\0\264\0\u0168\0\u017a";
+    "\0\0\0\36\0\74\0\132\0\170\0\226\0\264\0\322"+
+    "\0\360\0\u010e\0\u012c\0\u014a\0\u0168\0\u0186\0\u01a4\0\u01c2"+
+    "\0\u01e0\0\u01fe\0\u021c\0\u023a\0\74\0\u0258\0\u0276\0\u0294"+
+    "\0\u02b2\0\264\0\u02d0\0\u02ee\0\322\0\u030c\0\u032a\0\u0348"+
+    "\0\u0366\0\u0384\0\u03a2\0\u03c0\0\u03de\0\u03fc\0\u01a4\0\u041a"+
+    "\0\u0438\0\u0456\0\u0474\0\u0492\0\u04b0\0\u04ce\0\u04ec\0\u050a"+
+    "\0\u0528\0\u0546\0\u0564\0\u0582\0\u05a0\0\u05be\0\u05dc\0\u05fa"+
+    "\0\36\0\u0618\0\360\0\u0636\0\u0654";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -276,33 +350,94 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_TRANS = zzUnpackTrans();
 
   private static final String ZZ_TRANS_PACKED_0 =
-    "\1\2\1\3\1\4\1\2\1\5\1\6\3\2\1\7"+
-    "\1\10\1\11\2\2\1\12\1\13\2\14\23\0\3\3"+
-    "\1\15\1\0\1\16\1\0\1\16\1\17\2\0\1\16"+
-    "\1\0\1\12\2\0\1\3\1\0\1\3\2\4\1\15"+
-    "\1\0\1\16\1\0\1\16\1\17\2\0\1\16\1\0"+
-    "\1\12\2\0\1\4\1\0\2\3\2\5\2\0\2\20"+
-    "\1\21\2\0\1\20\1\0\1\12\2\0\1\5\3\0"+
-    "\1\6\1\0\1\6\3\0\1\17\7\0\1\6\1\0"+
-    "\2\3\1\22\1\5\1\23\3\0\1\22\4\0\1\12"+
-    "\2\0\1\22\3\0\1\10\15\0\1\10\3\0\1\11"+
-    "\15\0\1\11\1\0\2\3\1\12\1\15\1\0\1\16"+
-    "\1\0\1\16\1\17\2\0\1\24\1\25\1\12\2\0"+
-    "\1\12\3\0\1\26\13\0\1\27\1\0\1\26\3\0"+
-    "\1\14\14\0\2\14\1\0\2\3\2\15\2\0\2\30"+
-    "\1\17\2\0\1\30\1\0\1\12\2\0\1\15\1\0"+
-    "\2\3\1\16\12\0\1\3\2\0\1\16\1\0\2\3"+
-    "\1\17\1\15\1\23\3\0\1\17\4\0\1\12\2\0"+
-    "\1\17\3\0\1\20\1\5\14\0\1\20\1\0\2\3"+
-    "\1\21\1\5\1\23\3\0\1\21\4\0\1\12\2\0"+
-    "\1\21\3\0\1\23\1\0\1\23\3\0\1\17\7\0"+
-    "\1\23\1\0\2\3\1\24\1\15\4\0\1\17\4\0"+
-    "\1\12\2\0\1\24\3\0\1\25\12\0\1\24\2\0"+
-    "\1\25\3\0\1\27\13\0\1\27\1\0\1\27\3\0"+
-    "\1\30\1\15\14\0\1\30";
+    "\1\2\1\3\1\4\1\5\1\6\2\2\1\7\2\2"+
+    "\1\10\2\2\1\11\1\12\1\13\1\14\1\15\1\16"+
+    "\3\2\1\17\1\20\1\21\2\2\1\22\2\23\37\0"+
+    "\1\24\3\0\2\25\1\0\5\25\20\0\1\25\5\0"+
+    "\1\4\2\0\1\4\1\0\1\26\2\4\20\0\1\4"+
+    "\2\0\1\4\2\0\1\5\2\0\1\5\1\27\1\30"+
+    "\2\5\20\0\1\5\5\0\1\6\2\0\1\6\1\27"+
+    "\1\31\2\6\20\0\1\6\5\0\1\32\2\0\1\33"+
+    "\1\34\3\32\20\0\1\32\3\0\1\5\1\6\5\0"+
+    "\1\35\3\0\1\6\24\0\2\11\1\0\10\11\2\36"+
+    "\1\0\1\37\1\0\1\37\1\40\2\0\1\37\1\0"+
+    "\1\22\1\0\1\11\5\0\1\12\1\11\1\0\1\12"+
+    "\1\41\1\42\2\12\3\11\2\36\1\0\1\37\1\0"+
+    "\1\37\1\40\2\0\1\37\1\0\1\22\1\0\1\12"+
+    "\5\0\2\13\1\0\5\13\2\11\1\13\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\13\5\0\2\14\1\0\5\14\3\11\2\14"+
+    "\2\0\2\43\1\44\2\0\1\43\1\0\1\22\1\0"+
+    "\1\14\5\0\1\15\1\14\1\0\1\45\1\46\3\15"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\15\5\0\2\16\1\0\5\16\5\0"+
+    "\1\16\3\0\1\40\6\0\1\16\5\0\2\47\1\0"+
+    "\5\47\3\11\2\14\1\50\3\0\1\47\4\0\1\22"+
+    "\1\0\1\47\5\0\2\20\1\0\5\20\20\0\1\20"+
+    "\5\0\2\21\1\0\5\21\20\0\1\21\5\0\2\22"+
+    "\1\0\5\22\3\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\51\1\52\1\22\1\0\1\22\5\0"+
+    "\2\23\1\0\5\23\17\0\2\23\5\0\2\24\1\0"+
+    "\5\24\20\0\1\24\2\0\1\4\1\53\1\54\1\4"+
+    "\2\0\1\4\1\0\1\26\2\4\1\0\1\54\16\0"+
+    "\1\4\12\0\1\55\1\56\24\0\1\4\1\53\1\54"+
+    "\1\5\2\0\1\5\1\27\1\30\2\5\1\0\1\54"+
+    "\16\0\1\5\2\0\1\4\1\53\1\54\1\6\2\0"+
+    "\1\6\1\27\1\31\2\6\1\0\1\54\16\0\1\6"+
+    "\5\0\1\33\2\0\1\33\1\34\3\33\20\0\1\33"+
+    "\10\0\1\57\32\0\2\36\1\0\5\36\3\11\2\36"+
+    "\2\0\2\60\1\40\2\0\1\60\1\0\1\22\1\0"+
+    "\1\36\5\0\2\37\1\0\5\37\3\11\13\0\1\11"+
+    "\1\0\1\37\5\0\2\40\1\0\5\40\3\11\2\36"+
+    "\1\50\3\0\1\40\4\0\1\22\1\0\1\40\5\0"+
+    "\2\11\1\0\2\11\1\61\1\62\4\11\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\11\2\0\1\4\1\53\1\54\1\12\1\11"+
+    "\1\0\1\12\1\41\1\42\2\12\1\11\1\63\1\11"+
+    "\2\36\1\0\1\37\1\0\1\37\1\40\2\0\1\37"+
+    "\1\0\1\22\1\0\1\12\5\0\2\43\1\0\5\43"+
+    "\3\0\2\14\13\0\1\43\5\0\2\44\1\0\5\44"+
+    "\3\11\2\14\1\50\3\0\1\44\4\0\1\22\1\0"+
+    "\1\44\5\0\1\45\1\14\1\0\1\45\1\46\3\45"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\45\5\0\2\14\1\0\1\64\4\14"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\14\5\0\2\50\1\0\5\50\5\0"+
+    "\1\50\3\0\1\40\6\0\1\50\5\0\2\51\1\0"+
+    "\5\51\3\11\2\36\4\0\1\40\4\0\1\22\1\0"+
+    "\1\51\5\0\2\52\1\0\5\52\16\0\1\51\1\0"+
+    "\1\52\2\0\1\4\2\0\1\53\2\0\1\53\1\65"+
+    "\1\66\2\53\20\0\1\53\5\0\1\54\2\0\1\54"+
+    "\1\65\1\67\2\54\20\0\1\54\2\0\1\4\1\53"+
+    "\1\54\5\0\1\70\3\0\1\54\32\0\1\56\1\71"+
+    "\26\0\1\57\2\0\1\57\1\0\3\57\20\0\1\57"+
+    "\5\0\2\60\1\0\5\60\3\0\2\36\13\0\1\60"+
+    "\2\0\1\4\1\53\1\54\2\11\1\0\2\11\1\72"+
+    "\3\11\1\63\1\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\11\5\0"+
+    "\2\11\1\0\3\11\1\62\1\73\3\11\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\11\5\0\1\63\1\11\1\0\1\63\1\74"+
+    "\1\75\2\63\3\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\63\5\0"+
+    "\1\64\1\14\1\0\1\64\1\14\3\64\3\11\2\14"+
+    "\2\0\2\43\1\44\2\0\1\43\1\0\1\22\1\0"+
+    "\1\64\12\0\1\55\25\0\1\4\1\53\1\54\1\53"+
+    "\2\0\1\53\1\65\1\66\2\53\1\0\1\54\16\0"+
+    "\1\53\2\0\1\4\1\53\2\54\2\0\1\54\1\65"+
+    "\1\67\2\54\1\0\1\54\16\0\1\54\3\0\1\53"+
+    "\1\54\5\0\1\70\3\0\1\54\22\0\1\53\1\54"+
+    "\2\11\1\0\2\11\1\72\3\11\1\63\1\11\2\36"+
+    "\1\0\1\37\1\0\1\37\1\40\2\0\1\37\1\0"+
+    "\1\22\1\0\1\11\5\0\2\11\1\0\2\11\1\61"+
+    "\5\11\2\36\1\0\1\37\1\0\1\37\1\40\2\0"+
+    "\1\37\1\0\1\22\1\0\1\11\2\0\1\4\1\53"+
+    "\1\54\1\63\1\11\1\0\1\63\1\74\1\75\2\63"+
+    "\1\11\1\63\1\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\63";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[396];
+    int [] result = new int[1650];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -329,7 +464,7 @@ public final class StandardTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -340,11 +475,12 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\13\1\1\0\1\1\1\0\1\1\1\0"+
-    "\2\1\2\0\1\1\1\0";
+    "\1\0\1\11\22\1\1\0\4\1\1\0\1\1\2\0"+
+    "\1\1\1\0\3\1\1\0\3\1\1\0\2\1\1\0"+
+    "\2\1\2\0\1\1\1\0\7\1\1\0\1\11\4\1";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;
@@ -401,11 +537,11 @@ public final class StandardTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -447,6 +583,9 @@ public final class StandardTokenizerImpl {
 
   /** Hangul token type */
   public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+  
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = StandardTokenizer.EMOJI;
 
   /** Character count processed so far */
   public final int yychar()
@@ -492,7 +631,7 @@ public final class StandardTokenizerImpl {
     char [] map = new char[0x110000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2836) {
+    while (i < 4122) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -500,6 +639,8 @@ public final class StandardTokenizerImpl {
     return map;
   }
 
+/* -------------------------------------------------------------------------------- */
+/* Begin Lucene-specific disable-buffer-expansion modifications to skeleton.default */
 
   /**
    * Refills the input buffer.
@@ -527,32 +668,45 @@ public final class StandardTokenizerImpl {
 
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
+    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;
+    if (requested == 0) {
+      return true;
     }
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
-        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+        if (numRead == requested) { // We might have requested too few chars to encode a full Unicode character.
           --zzEndRead;
           zzFinalHighSurrogate = 1;
-          if (totalRead == 1) { return true; }
+          if (numRead == 1) {
+            return true;
+          }
+        } else {                    // There is room in the buffer for at least one more char
+          int c = zzReader.read();  // Expecting to read a low surrogate char
+          if (c == -1) {
+            return true;
+          } else {
+            zzBuffer[zzEndRead++] = (char)c;
+            return false;
+          }
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
+/* End Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+/* ------------------------------------------------------------------------------ */
     
   /**
    * Closes the input stream.
@@ -773,49 +927,62 @@ public final class StandardTokenizerImpl {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
-          }
-        case 9: break;
-        case 2: 
-          { return WORD_TYPE;
-          }
-        case 10: break;
-        case 3: 
-          { return HANGUL_TYPE;
-          }
-        case 11: break;
-        case 4: 
-          { return NUMERIC_TYPE;
-          }
-        case 12: break;
-        case 5: 
-          { return KATAKANA_TYPE;
-          }
-        case 13: break;
-        case 6: 
-          { return IDEOGRAPHIC_TYPE;
-          }
-        case 14: break;
-        case 7: 
-          { return HIRAGANA_TYPE;
-          }
-        case 15: break;
-        case 8: 
-          { return SOUTH_EAST_ASIAN_TYPE;
-          }
-        case 16: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
               {
                 return YYEOF;
               }
-          } 
-          else {
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */
+            } 
+            // fall through
+          case 10: break;
+          case 2: 
+            { return EMOJI_TYPE;
+            } 
+            // fall through
+          case 11: break;
+          case 3: 
+            { return WORD_TYPE;
+            } 
+            // fall through
+          case 12: break;
+          case 4: 
+            { return HANGUL_TYPE;
+            } 
+            // fall through
+          case 13: break;
+          case 5: 
+            { return NUMERIC_TYPE;
+            } 
+            // fall through
+          case 14: break;
+          case 6: 
+            { return KATAKANA_TYPE;
+            } 
+            // fall through
+          case 15: break;
+          case 7: 
+            { return IDEOGRAPHIC_TYPE;
+            } 
+            // fall through
+          case 16: break;
+          case 8: 
+            { return HIRAGANA_TYPE;
+            } 
+            // fall through
+          case 17: break;
+          case 9: 
+            { return SOUTH_EAST_ASIAN_TYPE;
+            } 
+            // fall through
+          case 18: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
index a1e7b17..e95a9b4 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@@ -34,12 +34,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %integer
 %final
 %public
@@ -48,22 +49,67 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %char
 %buffer 255
 
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
-NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
-HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
+
+//////////////////////////////////////////////////////////////////////////
+// Begin Emoji Macros - see documentation below, near the EMOJI_TYPE rule
+
+// TODO: Remove this include file when JFlex supports these properties directly (in Unicode 11.0+)
+%include ../../../../../../data/jflex/UnicodeEmojiProperties.jflex
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+//   \uFE0E (Text Presentation Selector) and \uFE0F (Emoji Presentation Selector) - included in \p{WB:Extend}
+//   - are explicitly excluded here so that we can properly handle Emoji sequences.
+//
+ExtFmtZwjSansPresSel = [[\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]--[\uFE0E\uFE0F]]*
+
+KeyCapBaseChar = [0-9#*]
+KeyCapBaseCharEx = {KeyCapBaseChar} {ExtFmtZwjSansPresSel}
+KeyCap = \u20E3
+KeyCapEx = {KeyCap} {ExtFmtZwjSansPresSel}
+
+// # \u3030 = WAVY DASH; \u303D = PART ALTERNATION MARK
+AccidentalEmoji = [ยฉยฎโ„ข\u3030\u303D]
+EmojiRKAM = ( \p{WB:Regional_Indicator} | {KeyCapBaseChar} | {AccidentalEmoji} | {Emoji_Modifier} )
+
+// Unlike Unicode properties, macros are not allowed in character classes, so we achieve set difference
+// by applying DeMorgan: the expression that matches everything of 'a' not matched by 'b' is: !(!a|b)
+// TODO: Convert this expression to character class difference when JFlex supports the properties directly (in Unicode 11.0+)
+EmojiSansRKAM = !( ! {Emoji} | {EmojiRKAM} )
+
+EmojiChar = ( {Extended_Pictographic} | {EmojiSansRKAM} )
+
+EmojiCharEx         = {EmojiChar}           {ExtFmtZwjSansPresSel}
+EmojiModifierBaseEx = {Emoji_Modifier_Base} {ExtFmtZwjSansPresSel}
+EmojiModifierEx     = {Emoji_Modifier}      {ExtFmtZwjSansPresSel}
+
+EmojiPresentationSelector = \uFE0F
+EmojiCharOrPresSeqOrModSeq = ( \p{WB:ZWJ}* {EmojiCharEx} {EmojiPresentationSelector}? ) | ( ( \p{WB:ZWJ}* {EmojiModifierBaseEx} )? {EmojiModifierEx} )
+TagSpec = [\u{E0020}-\u{E007E}]
+TagTerm = \u{E007F}
+
+// End Emoji Macros
+//////////////////////////////////////////////////////////////////////////
+
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] {ExtFmtZwj}
+AHLetterEx          = [\p{WB:ALetter}\p{WB:Hebrew_Letter}]                      {ExtFmtZwj}
+NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        {ExtFmtZwj}
+KatakanaEx          = \p{WB:Katakana}                                           {ExtFmtZwj} 
+MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      {ExtFmtZwj} 
+MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         {ExtFmtZwj}
+ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       {ExtFmtZwj}
+HanEx               = \p{Script:Han}                                            {ExtFmtZwj}
+HiraganaEx          = \p{Script:Hiragana}                                       {ExtFmtZwj}
+SingleQuoteEx       = \p{WB:Single_Quote}                                       {ExtFmtZwj}
+DoubleQuoteEx       = \p{WB:Double_Quote}                                       {ExtFmtZwj}
+HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      {ExtFmtZwj}
+RegionalIndicatorEx = \p{WB:Regional_Indicator}                                 {ExtFmtZwj}
+ComplexContextEx    = \p{LB:Complex_Context}                                    {ExtFmtZwj}
 
 %{
   /** Alphanumeric sequences */
@@ -93,6 +139,9 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
   /** Hangul token type */
   public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+  
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = StandardTokenizer.EMOJI;
 
   /** Character count processed so far */
   public final int yychar()
@@ -120,18 +169,64 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
 %%
 
-// UAX#29 WB1.   sot   รท
-//        WB2.     รท   eot
+// UAX#29 WB1.    sot รท Any
+//        WB2.    Any รท eot
 //
 <<EOF>> { return YYEOF; }
 
-// UAX#29 WB8.   Numeric ร— Numeric
-//        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) ร— Numeric
-//        WB12.  Numeric ร— (MidNum | MidNumLet | Single_Quote) Numeric
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-//        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
+// Instead of these: UAX#29 WB3c. ZWJ ร— (Glue_After_Zwj | EBG)
+//                          WB14. (E_Base | EBG) ร— E_Modifier
+//                          WB15. ^ (RI RI)* RI ร— RI
+//                          WB16. [^RI] (RI RI)* RI ร— RI
+//
+// We use the "emoji_sequence" rule from http://www.unicode.org/reports/tr51/tr51-14.html (Unicode 11.0)
+// and the Emoji data from http://unicode.org/Public/emoji/11.0/emoji-data.txt (in included file UnicodeEmojiProperties.jflex)
+// 
+// emoji_sequence :=
+//    Top-level EBNF           Expanded #1                       Expanded #2                       Expanded #3
+//    ---------------------    ----------------------------      -----------------------------     ----------------------------------------------
+//      emoji_core_sequence      emoji_combining_sequence          emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_keycap_sequence           | [0-9#*] \u{FE0F 20E3}      [1]
+//                             | emoji_modifier_sequence                                           | \p{Emoji_Modifier_Base} \p{Emoji_Modifier}
+//                             | emoji_flag_sequence                                               | \p{WB:Regional_Indicator}{2}               )
+//
+//    | emoji_zwj_sequence       emoji_zwj_element                 emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+//                             ( ZWJ emoji_zwj_element )+                                          ( \p{WB:ZWJ} ^^ )+
+// 
+//    | emoji_tag_sequence     tag_base                            emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+//                             tag_spec                                                            [\u{E0020}-\u{E007E}]+
+//                             tag_term                                                            \u{E007F}
+//
+// [1] https://unicode.org/Public/emoji/11.0/emoji-test.txt includes key cap sequences 
+//     WITHOUT \uFE0F (emoji presentation indicator), annotating them as "non-fully-qualified";
+//     TR#51 says about non-fully-qualified *ZWJ sequences* that implementations may
+//     choose whether to support them for segmentation.  This implementation will
+//     recognize /[0-9#*]\u20E3/ - i.e. without \uFE0F - as Emoji. 
+//
+// See also: http://www.unicode.org/L2/L2016/16315-handling-seg-emoji.pdf
+//           https://docs.google.com/document/d/1yDZ5TUZNVVKaM9zYCCLbRIAKGNZANsAGl0bcNzGGvn8
+//
+//     In particular, the above docs recommend a modified UAX#29 WB3c rule (covered by TR#51's "emoji_zwj_sequence"):
+//
+//         WB3cโ€ฒ ZWJ ร— โ€‹(Extended_Pictographic | EmojiNRK)
+//
+  {EmojiCharOrPresSeqOrModSeq} ( ( \p{WB:ZWJ} {EmojiCharOrPresSeqOrModSeq} )* | {TagSpec}+ {TagTerm} ) 
+| {KeyCapBaseCharEx} {EmojiPresentationSelector}? {KeyCapEx} 
+| {RegionalIndicatorEx}{2} 
+  { return EMOJI_TYPE; }
+
+// UAX#29 WB8.    Numeric ร— Numeric
+//        WB11.   Numeric (MidNum | MidNumLetQ) ร— Numeric
+//        WB12.   Numeric ร— (MidNum | MidNumLetQ) Numeric
+//        WB13a.  (AHLetter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+//        WB13b.  ExtendNumLet ร— (AHLetter | Numeric | Katakana)
 //
-{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* 
+{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
   { return NUMERIC_TYPE; }
 
 // subset of the below for typing purposes only!
@@ -141,28 +236,28 @@ ComplexContextEx    = \p{LB:Complex_Context}
 {KatakanaEx}+
   { return KATAKANA_TYPE; }
 
-// UAX#29 WB5.   (ALetter | Hebrew_Letter) ร— (ALetter | Hebrew_Letter)
-//        WB6.   (ALetter | Hebrew_Letter) ร— (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-//        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) ร— (ALetter | Hebrew_Letter)
-//        WB7a.  Hebrew_Letter ร— Single_Quote
-//        WB7b.  Hebrew_Letter ร— Double_Quote Hebrew_Letter
-//        WB7c.  Hebrew_Letter Double_Quote ร— Hebrew_Letter
-//        WB9.   (ALetter | Hebrew_Letter) ร— Numeric
-//        WB10.  Numeric ร— (ALetter | Hebrew_Letter)
-//        WB13.  Katakana ร— Katakana
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-//        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
-//
-{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+// UAX#29 WB5.    AHLetter ร— AHLetter
+//        WB6.    AHLetter ร— (MidLetter | MidNumLetQ) AHLetter
+//        WB7.    AHLetter (MidLetter | MidNumLetQ) ร— AHLetter
+//        WB7a.   Hebrew_Letter ร— Single_Quote
+//        WB7b.   Hebrew_Letter ร— Double_Quote Hebrew_Letter
+//        WB7c.   Hebrew_Letter Double_Quote ร— Hebrew_Letter
+//        WB9.    AHLetter ร— Numeric
+//        WB10.   Numeric ร— AHLetter
+//        WB13.   Katakana ร— Katakana
+//        WB13a.  (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+//        WB13b.  ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
+//
+{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                     | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                      )+
                    )
-({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                     | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                      )+
                    )
 )*
@@ -172,13 +267,13 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
 // From UAX #29:
 //
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
+//    [C]haracters with the Line_Break property values of Contingent_Break (CB),
+//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word
 //    boundary property values based on criteria outside of the scope of this
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 9.0, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC ( ๏ฟผ ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -191,17 +286,14 @@ ComplexContextEx    = \p{LB:Complex_Context}
 //
 {ComplexContextEx}+ { return SOUTH_EAST_ASIAN_TYPE; }
 
-// UAX#29 WB14.  Any รท Any
+// UAX#29 WB999.  Any รท Any
 //
 {HanEx} { return IDEOGRAPHIC_TYPE; }
 {HiraganaEx} { return HIRAGANA_TYPE; }
 
-
-// UAX#29 WB3.   CR ร— LF
-//        WB3a.  (Newline | CR | LF) รท
-//        WB3b.  รท (Newline | CR | LF)
-//        WB13c. Regional_Indicator ร— Regional_Indicator
-//        WB14.  Any รท Any
+// UAX#29 WB3.    CR ร— LF
+//        WB3a.   (Newline | CR | LF) รท
+//        WB3b.   รท (Newline | CR | LF)
+//        WB999.  Any รท Any
 //
-{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-  { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */ }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
index 6abbc2b..615b565 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
@@ -18,8 +18,11 @@ package org.apache.lucene.analysis.standard;
 
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Random;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -27,6 +30,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TestUtil;
 
@@ -282,7 +286,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
   }
   
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+    WordBreakTestUnicode_9_0_0 wordBreakTest = new WordBreakTestUnicode_9_0_0();
     wordBreakTest.test(a);
   }
   
@@ -358,8 +362,80 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
     BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new String[] { "3_1", "2" });
   }
 
-
-
+  /** simple emoji */
+  public void testEmoji() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉ ๐Ÿ’ฉ๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "๐Ÿ’ฉ", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence */
+  public void testEmojiSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ",
+        new String[] { "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence with fitzpatrick modifier */
+  public void testEmojiSequenceWithModifier() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ",
+        new String[] { "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** regional indicator */
+  public void testEmojiRegionalIndicator() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡บ๐Ÿ‡ธ",
+        new String[] { "๐Ÿ‡บ๐Ÿ‡ธ", "๐Ÿ‡บ๐Ÿ‡ธ" },
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** variation sequence */
+  public void testEmojiVariationSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#๏ธโƒฃ",
+        new String[] { "#๏ธโƒฃ" },
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3๏ธโƒฃ",
+        new String[] { "3๏ธโƒฃ",},
+        new String[] { "<EMOJI>" });
+
+    // text presentation sequences
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
+        new String[] { },
+        new String[] { });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
+        new String[] { "3\uFE0E",},
+        new String[] { "<NUM>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
+        new String[] { "\u2B55",},
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
+        new String[] { "\u2B55", "\u200D\u2B55"},
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  public void testEmojiTagSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ",
+        new String[] { "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  public void testEmojiTokenization() throws Exception {
+    // simple emoji around latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "poo๐Ÿ’ฉpoo",
+        new String[] { "poo", "๐Ÿ’ฉ", "poo" },
+        new String[] { "<ALPHANUM>", "<EMOJI>", "<ALPHANUM>" });
+    // simple emoji around non-latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉไธญๅœ‹๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "ไธญ", "ๅœ‹", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<EMOJI>" });
+  }
+  
+  public void testUnicodeEmojiTests() throws Exception {
+    EmojiTokenizationTestUnicode_11_0 emojiTest = new EmojiTokenizationTestUnicode_11_0();
+    emojiTest.test(a);
+  }
+  
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     Analyzer analyzer = new StandardAnalyzer();
@@ -416,4 +492,53 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
     assertAnalyzesTo(a, "ab cd toolong xy z", new String[]{"ab", "cd", "toolo", "ng", "xy", "z"});
     a.close();
   }
+
+  public void testSplitSurrogatePairWithSpoonFeedReader() throws Exception {
+    String text = "12345678\ud800\udf00"; // U+D800 U+DF00 = U+10300 = ๐Œ€ (OLD ITALIC LETTER A)
+    
+    // Collect tokens with normal reader
+    StandardAnalyzer a = new StandardAnalyzer();
+    TokenStream ts = a.tokenStream("dummy", text);
+    List<String> tokens = new ArrayList<>();
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+    ts.reset();
+    while (ts.incrementToken()) {
+      tokens.add(termAtt.toString());
+    }
+    ts.end();
+    ts.close();
+
+    // Tokens from a spoon-feed reader should be the same as from a normal reader
+    // The 9th char is a high surrogate, so the 9-max-chars spoon-feed reader will split the surrogate pair at a read boundary
+    Reader reader = new SpoonFeedMaxCharsReaderWrapper(9, new StringReader(text));
+    ts = a.tokenStream("dummy", reader);
+    termAtt = ts.addAttribute(CharTermAttribute.class);
+    ts.reset();
+    for (int tokenNum = 0 ; ts.incrementToken() ; ++tokenNum) {
+      assertEquals("token #" + tokenNum + " mismatch: ", termAtt.toString(), tokens.get(tokenNum));
+    }
+    ts.end();
+    ts.close();
+  }
+}
+
+class SpoonFeedMaxCharsReaderWrapper extends Reader {
+  private final Reader in;
+  private final int maxChars; 
+
+  public SpoonFeedMaxCharsReaderWrapper(int maxChars, Reader in) {
+    this.in = in;
+    this.maxChars = maxChars;
+  }
+
+  @Override
+  public void close() throws IOException {
+    in.close();
+  }
+
+  /** Returns the configured number of chars if available */
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    return in.read(cbuf, off, Math.min(maxChars, len));
+  }
 }


[03/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
deleted file mode 100644
index 4a3731e..0000000
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
+++ /dev/null
@@ -1,5537 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.junit.Ignore;
-
-/**
- * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
- * from: http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
- *
- * WordBreakTest.txt indicates the points in the provided character sequences
- * at which conforming implementations must and must not break words.  This
- * class tests for expected token extraction from each of the test sequences
- * in WordBreakTest.txt, where the expected tokens are those character
- * sequences bounded by word breaks and containing at least one character
- * from one of the following character sets:
- *
- *    \p{Script = Han}                (From http://www.unicode.org/Public/6.3.0/ucd/Scripts.txt)
- *    \p{Script = Hiragana}
- *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.3.0/ucd/LineBreak.txt)
- *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt)
- *    \p{WordBreak = Hebrew_Letter}
- *    \p{WordBreak = Katakana}
- *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\uFF10-\uFF19]                (Full-width Arabic digits)
- */
-@Ignore
-public class WordBreakTestUnicode_6_3_0 extends BaseTokenStreamTestCase {
-
-  public void test(Analyzer analyzer) throws Exception {
-    // รท 0001 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0001",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
-                     new String[] {  });
-
-    // รท 0001 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\r",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
-                     new String[] {  });
-
-    // รท 0001 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\n",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
-                     new String[] {  });
-
-    // รท 0001 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u000B",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
-                     new String[] {  });
-
-    // รท 0001 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 0001 ร— 0308 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 0001 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 0001 ร— 0308 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 0001 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u003A",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
-                     new String[] {  });
-
-    // รท 0001 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002C",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
-                     new String[] {  });
-
-    // รท 0001 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002E",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
-                     new String[] {  });
-
-    // รท 0001 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 0001 ร— 0308 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 0001 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u005F",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
-                     new String[] {  });
-
-    // รท 0001 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 0001 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 0001 ร— 0308 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 0001 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\"",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
-                     new String[] {  });
-
-    // รท 0001 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0027",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
-                     new String[] {  });
-
-    // รท 0001 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u00AD",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 0001 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0300",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
-                     new String[] {  });
-
-    // รท 0001 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 0001 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 0001 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0001",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000D รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\r",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\r",
-                     new String[] {  });
-
-    // รท 000D ร— 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) ร— [3.0] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\n",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\n",
-                     new String[] {  });
-
-    // รท 000D รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u000B",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000D รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000D รท 0308 รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000D รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000D รท 0308 รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000D รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u003A",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000D รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002C",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000D รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002E",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000D รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000D รท 0308 รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000D รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u005F",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000D รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000D รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000D รท 0308 รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000D รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\"",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\"",
-                     new String[] {  });
-
-    // รท 000D รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0027",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000D รท 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u00AD",
-                     new String[] {  });
-
-    // รท 000D รท 0308 ร— 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000D รท 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0300",
-                     new String[] {  });
-
-    // รท 000D รท 0308 ร— 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000D รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000D รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000D รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0001",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000A รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\r",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\r",
-                     new String[] {  });
-
-    // รท 000A รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\n",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\n",
-                     new String[] {  });
-
-    // รท 000A รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u000B",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000A รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000A รท 0308 รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000A รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000A รท 0308 รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000A รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u003A",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000A รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002C",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000A รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002E",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000A รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000A รท 0308 รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000A รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u005F",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000A รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000A รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000A รท 0308 รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000A รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\"",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\"",
-                     new String[] {  });
-
-    // รท 000A รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0027",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000A รท 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u00AD",
-                     new String[] {  });
-
-    // รท 000A รท 0308 ร— 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000A รท 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0300",
-                     new String[] {  });
-
-    // รท 000A รท 0308 ร— 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000A รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000A รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000A รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0001",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000B รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\r",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
-                     new String[] {  });
-
-    // รท 000B รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\n",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
-                     new String[] {  });
-
-    // รท 000B รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u000B",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000B รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000B รท 0308 รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000B รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000B รท 0308 รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000B รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u003A",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000B รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002C",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000B รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002E",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000B รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000B รท 0308 รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000B รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u005F",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000B รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000B รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000B รท 0308 รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000B รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\"",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
-                     new String[] {  });
-
-    // รท 000B รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0027",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000B รท 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u00AD",
-                     new String[] {  });
-
-    // รท 000B รท 0308 ร— 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000B รท 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0300",
-                     new String[] {  });
-
-    // รท 000B รท 0308 ร— 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000B รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000B รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000B รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 3031 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0001",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\r",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\n",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u000B",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u3031",
-                     new String[] { "\u3031\u3031" });
-
-    // รท 3031 ร— 0308 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
-                     new String[] { "\u3031\u0308\u3031" });
-
-    // รท 3031 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0041",
-                     new String[] { "\u3031", "\u0041" });
-
-    // รท 3031 ร— 0308 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
-                     new String[] { "\u3031\u0308", "\u0041" });
-
-    // รท 3031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u003A",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002C",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002E",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0030",
-                     new String[] { "\u3031", "\u0030" });
-
-    // รท 3031 ร— 0308 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
-                     new String[] { "\u3031\u0308", "\u0030" });
-
-    // รท 3031 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u005F",
-                     new String[] { "\u3031\u005F" });
-
-    // รท 3031 ร— 0308 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
-                     new String[] { "\u3031\u0308\u005F" });
-
-    // รท 3031 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u05D0",
-                     new String[] { "\u3031", "\u05D0" });
-
-    // รท 3031 ร— 0308 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
-                     new String[] { "\u3031\u0308", "\u05D0" });
-
-    // รท 3031 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\"",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0027",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u00AD",
-                     new String[] { "\u3031\u00AD" });
-
-    // รท 3031 ร— 0308 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
-                     new String[] { "\u3031\u0308\u00AD" });
-
-    // รท 3031 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0300",
-                     new String[] { "\u3031\u0300" });
-
-    // รท 3031 ร— 0308 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
-                     new String[] { "\u3031\u0308\u0300" });
-
-    // รท 3031 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
-                     new String[] { "\u3031", "\u0061\u2060" });
-
-    // รท 3031 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
-                     new String[] { "\u3031\u0308", "\u0061\u2060" });
-
-    // รท 3031 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 0041 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0001",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\r",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\n",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u000B",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u3031",
-                     new String[] { "\u0041", "\u3031" });
-
-    // รท 0041 ร— 0308 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
-                     new String[] { "\u0041\u0308", "\u3031" });
-
-    // รท 0041 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0041",
-                     new String[] { "\u0041\u0041" });
-
-    // รท 0041 ร— 0308 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
-                     new String[] { "\u0041\u0308\u0041" });
-
-    // รท 0041 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u003A",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002C",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002E",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0030",
-                     new String[] { "\u0041\u0030" });
-
-    // รท 0041 ร— 0308 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
-                     new String[] { "\u0041\u0308\u0030" });
-
-    // รท 0041 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u005F",
-                     new String[] { "\u0041\u005F" });
-
-    // รท 0041 ร— 0308 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
-                     new String[] { "\u0041\u0308\u005F" });
-
-    // รท 0041 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u05D0",
-                     new String[] { "\u0041\u05D0" });
-
-    // รท 0041 ร— 0308 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
-                     new String[] { "\u0041\u0308\u05D0" });
-
-    // รท 0041 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\"",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0027",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 00AD รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u00AD",
-                     new String[] { "\u0041\u00AD" });
-
-    // รท 0041 ร— 0308 ร— 00AD รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u00AD",
-                     new String[] { "\u0041\u0308\u00AD" });
-
-    // รท 0041 ร— 0300 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0300",
-                     new String[] { "\u0041\u0300" });
-
-    // รท 0041 ร— 0308 ร— 0300 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0300",
-                     new String[] { "\u0041\u0308\u0300" });
-
-    // รท 0041 ร— 0061 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u2060",
-                     new String[] { "\u0041\u0061\u2060" });
-
-    // รท 0041 ร— 0308 ร— 0061 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u2060",
-                     new String[] { "\u0041\u0308\u0061\u2060" });
-
-    // รท 0041 ร— 0061 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u003A",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u003A",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 0027 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 0027 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u002C",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u002C",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0031 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u003A",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u003A",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u0027",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u0027",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002C",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002C",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 002E ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 002E ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 003A รท 0001 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0001",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0001 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0001",
-                     new String[] {  });
-
-    // รท 003A รท 000D รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\r",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000D รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\r",
-                     new String[] {  });
-
-    // รท 003A รท 000A รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\n",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000A รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\n",
-                     new String[] {  });
-
-    // รท 003A รท 000B รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u000B",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000B รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u000B",
-                     new String[] {  });
-
-    // รท 003A รท 3031 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 003A ร— 0308 รท 3031 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 003A รท 0041 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 003A ร— 0308 รท 0041 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 003A รท 003A รท  #  รท [0.2] COLON (MidLetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u003A",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 003A รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u003A",
-                     new String[] {  });
-
-    // รท 003A รท 002C รท  #  รท [0.2] COLON (MidLetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002C",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 002C รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002C",
-                     new String[] {  });
-
-    // รท 003A รท 002E รท  #  รท [0.2] COLON (MidLetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002E",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 002E รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002E",
-                     new String[] {  });
-
-    // รท 003A รท 0030 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 003A ร— 0308 รท 0030 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 003A รท 005F รท  #  รท [0.2] COLON (MidLetter) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u005F",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 005F รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u005F",
-                     new String[] {  });
-
-    // รท 003A รท 1F1E6 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 1F1E6 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 003A รท 05D0 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 003A ร— 0308 รท 05D0 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 003A รท 0022 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\"",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0022 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\"",
-                     new String[] {  });
-
-    // รท 003A รท 0027 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0027",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0027 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0027",
-                     new String[] {  });
-
-    // รท 003A ร— 00AD รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u00AD",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 ร— 00AD รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 003A ร— 0300 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0300",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 ร— 0300 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0300",
-                     new Str

<TRUNCATED>

[04/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
new file mode 100644
index 0000000..5e99ef4
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
@@ -0,0 +1,10756 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by generateEmojiTokenizationTest.pl
+ * from: http://www.unicode.org/Public/emoji/11.0/emoji-test.txt
+ *
+ * emoji-test.txt contains emoji char sequences, which are represented as
+ * tokenization tests in this class.
+ * 
+ */
+@Ignore
+public class EmojiTokenizationTestUnicode_11_0 extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    for (int i = 0 ; i < tests.length ; i += 2) {
+      String test = tests[i + 1];
+      try {
+        assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
+      } catch (Throwable t) {
+        throw new Exception("Failed to tokenize \"" + tests[i] + "\":", t);        
+      }
+    }
+  }
+
+  private String[] tests = new String[] {
+    "1F600                                      ; fully-qualified     # ๐Ÿ˜€ grinning face",
+    "\uD83D\uDE00",
+
+    "1F601                                      ; fully-qualified     # ๐Ÿ˜ beaming face with smiling eyes",
+    "\uD83D\uDE01",
+
+    "1F602                                      ; fully-qualified     # ๐Ÿ˜‚ face with tears of joy",
+    "\uD83D\uDE02",
+
+    "1F923                                      ; fully-qualified     # ๐Ÿคฃ rolling on the floor laughing",
+    "\uD83E\uDD23",
+
+    "1F603                                      ; fully-qualified     # ๐Ÿ˜ƒ grinning face with big eyes",
+    "\uD83D\uDE03",
+
+    "1F604                                      ; fully-qualified     # ๐Ÿ˜„ grinning face with smiling eyes",
+    "\uD83D\uDE04",
+
+    "1F605                                      ; fully-qualified     # ๐Ÿ˜… grinning face with sweat",
+    "\uD83D\uDE05",
+
+    "1F606                                      ; fully-qualified     # ๐Ÿ˜† grinning squinting face",
+    "\uD83D\uDE06",
+
+    "1F609                                      ; fully-qualified     # ๐Ÿ˜‰ winking face",
+    "\uD83D\uDE09",
+
+    "1F60A                                      ; fully-qualified     # ๐Ÿ˜Š smiling face with smiling eyes",
+    "\uD83D\uDE0A",
+
+    "1F60B                                      ; fully-qualified     # ๐Ÿ˜‹ face savoring food",
+    "\uD83D\uDE0B",
+
+    "1F60E                                      ; fully-qualified     # ๐Ÿ˜Ž smiling face with sunglasses",
+    "\uD83D\uDE0E",
+
+    "1F60D                                      ; fully-qualified     # ๐Ÿ˜ smiling face with heart-eyes",
+    "\uD83D\uDE0D",
+
+    "1F618                                      ; fully-qualified     # ๐Ÿ˜˜ face blowing a kiss",
+    "\uD83D\uDE18",
+
+    "1F970                                      ; fully-qualified     # ๐Ÿฅฐ smiling face with 3 hearts",
+    "\uD83E\uDD70",
+
+    "1F617                                      ; fully-qualified     # ๐Ÿ˜— kissing face",
+    "\uD83D\uDE17",
+
+    "1F619                                      ; fully-qualified     # ๐Ÿ˜™ kissing face with smiling eyes",
+    "\uD83D\uDE19",
+
+    "1F61A                                      ; fully-qualified     # ๐Ÿ˜š kissing face with closed eyes",
+    "\uD83D\uDE1A",
+
+    "263A FE0F                                  ; fully-qualified     # โ˜บ๏ธ smiling face",
+    "\u263A\uFE0F",
+
+    "263A                                       ; non-fully-qualified # โ˜บ smiling face",
+    "\u263A",
+
+    "1F642                                      ; fully-qualified     # ๐Ÿ™‚ slightly smiling face",
+    "\uD83D\uDE42",
+
+    "1F917                                      ; fully-qualified     # ๐Ÿค— hugging face",
+    "\uD83E\uDD17",
+
+    "1F929                                      ; fully-qualified     # ๐Ÿคฉ star-struck",
+    "\uD83E\uDD29",
+
+    "1F914                                      ; fully-qualified     # ๐Ÿค” thinking face",
+    "\uD83E\uDD14",
+
+    "1F928                                      ; fully-qualified     # ๐Ÿคจ face with raised eyebrow",
+    "\uD83E\uDD28",
+
+    "1F610                                      ; fully-qualified     # ๐Ÿ˜ neutral face",
+    "\uD83D\uDE10",
+
+    "1F611                                      ; fully-qualified     # ๐Ÿ˜‘ expressionless face",
+    "\uD83D\uDE11",
+
+    "1F636                                      ; fully-qualified     # ๐Ÿ˜ถ face without mouth",
+    "\uD83D\uDE36",
+
+    "1F644                                      ; fully-qualified     # ๐Ÿ™„ face with rolling eyes",
+    "\uD83D\uDE44",
+
+    "1F60F                                      ; fully-qualified     # ๐Ÿ˜ smirking face",
+    "\uD83D\uDE0F",
+
+    "1F623                                      ; fully-qualified     # ๐Ÿ˜ฃ persevering face",
+    "\uD83D\uDE23",
+
+    "1F625                                      ; fully-qualified     # ๐Ÿ˜ฅ sad but relieved face",
+    "\uD83D\uDE25",
+
+    "1F62E                                      ; fully-qualified     # ๐Ÿ˜ฎ face with open mouth",
+    "\uD83D\uDE2E",
+
+    "1F910                                      ; fully-qualified     # ๐Ÿค zipper-mouth face",
+    "\uD83E\uDD10",
+
+    "1F62F                                      ; fully-qualified     # ๐Ÿ˜ฏ hushed face",
+    "\uD83D\uDE2F",
+
+    "1F62A                                      ; fully-qualified     # ๐Ÿ˜ช sleepy face",
+    "\uD83D\uDE2A",
+
+    "1F62B                                      ; fully-qualified     # ๐Ÿ˜ซ tired face",
+    "\uD83D\uDE2B",
+
+    "1F634                                      ; fully-qualified     # ๐Ÿ˜ด sleeping face",
+    "\uD83D\uDE34",
+
+    "1F60C                                      ; fully-qualified     # ๐Ÿ˜Œ relieved face",
+    "\uD83D\uDE0C",
+
+    "1F61B                                      ; fully-qualified     # ๐Ÿ˜› face with tongue",
+    "\uD83D\uDE1B",
+
+    "1F61C                                      ; fully-qualified     # ๐Ÿ˜œ winking face with tongue",
+    "\uD83D\uDE1C",
+
+    "1F61D                                      ; fully-qualified     # ๐Ÿ˜ squinting face with tongue",
+    "\uD83D\uDE1D",
+
+    "1F924                                      ; fully-qualified     # ๐Ÿคค drooling face",
+    "\uD83E\uDD24",
+
+    "1F612                                      ; fully-qualified     # ๐Ÿ˜’ unamused face",
+    "\uD83D\uDE12",
+
+    "1F613                                      ; fully-qualified     # ๐Ÿ˜“ downcast face with sweat",
+    "\uD83D\uDE13",
+
+    "1F614                                      ; fully-qualified     # ๐Ÿ˜” pensive face",
+    "\uD83D\uDE14",
+
+    "1F615                                      ; fully-qualified     # ๐Ÿ˜• confused face",
+    "\uD83D\uDE15",
+
+    "1F643                                      ; fully-qualified     # ๐Ÿ™ƒ upside-down face",
+    "\uD83D\uDE43",
+
+    "1F911                                      ; fully-qualified     # ๐Ÿค‘ money-mouth face",
+    "\uD83E\uDD11",
+
+    "1F632                                      ; fully-qualified     # ๐Ÿ˜ฒ astonished face",
+    "\uD83D\uDE32",
+
+    "2639 FE0F                                  ; fully-qualified     # โ˜น๏ธ frowning face",
+    "\u2639\uFE0F",
+
+    "2639                                       ; non-fully-qualified # โ˜น frowning face",
+    "\u2639",
+
+    "1F641                                      ; fully-qualified     # ๐Ÿ™ slightly frowning face",
+    "\uD83D\uDE41",
+
+    "1F616                                      ; fully-qualified     # ๐Ÿ˜– confounded face",
+    "\uD83D\uDE16",
+
+    "1F61E                                      ; fully-qualified     # ๐Ÿ˜ž disappointed face",
+    "\uD83D\uDE1E",
+
+    "1F61F                                      ; fully-qualified     # ๐Ÿ˜Ÿ worried face",
+    "\uD83D\uDE1F",
+
+    "1F624                                      ; fully-qualified     # ๐Ÿ˜ค face with steam from nose",
+    "\uD83D\uDE24",
+
+    "1F622                                      ; fully-qualified     # ๐Ÿ˜ข crying face",
+    "\uD83D\uDE22",
+
+    "1F62D                                      ; fully-qualified     # ๐Ÿ˜ญ loudly crying face",
+    "\uD83D\uDE2D",
+
+    "1F626                                      ; fully-qualified     # ๐Ÿ˜ฆ frowning face with open mouth",
+    "\uD83D\uDE26",
+
+    "1F627                                      ; fully-qualified     # ๐Ÿ˜ง anguished face",
+    "\uD83D\uDE27",
+
+    "1F628                                      ; fully-qualified     # ๐Ÿ˜จ fearful face",
+    "\uD83D\uDE28",
+
+    "1F629                                      ; fully-qualified     # ๐Ÿ˜ฉ weary face",
+    "\uD83D\uDE29",
+
+    "1F92F                                      ; fully-qualified     # ๐Ÿคฏ exploding head",
+    "\uD83E\uDD2F",
+
+    "1F62C                                      ; fully-qualified     # ๐Ÿ˜ฌ grimacing face",
+    "\uD83D\uDE2C",
+
+    "1F630                                      ; fully-qualified     # ๐Ÿ˜ฐ anxious face with sweat",
+    "\uD83D\uDE30",
+
+    "1F631                                      ; fully-qualified     # ๐Ÿ˜ฑ face screaming in fear",
+    "\uD83D\uDE31",
+
+    "1F975                                      ; fully-qualified     # ๐Ÿฅต hot face",
+    "\uD83E\uDD75",
+
+    "1F976                                      ; fully-qualified     # ๐Ÿฅถ cold face",
+    "\uD83E\uDD76",
+
+    "1F633                                      ; fully-qualified     # ๐Ÿ˜ณ flushed face",
+    "\uD83D\uDE33",
+
+    "1F92A                                      ; fully-qualified     # ๐Ÿคช zany face",
+    "\uD83E\uDD2A",
+
+    "1F635                                      ; fully-qualified     # ๐Ÿ˜ต dizzy face",
+    "\uD83D\uDE35",
+
+    "1F621                                      ; fully-qualified     # ๐Ÿ˜ก pouting face",
+    "\uD83D\uDE21",
+
+    "1F620                                      ; fully-qualified     # ๐Ÿ˜  angry face",
+    "\uD83D\uDE20",
+
+    "1F92C                                      ; fully-qualified     # ๐Ÿคฌ face with symbols on mouth",
+    "\uD83E\uDD2C",
+
+    "1F637                                      ; fully-qualified     # ๐Ÿ˜ท face with medical mask",
+    "\uD83D\uDE37",
+
+    "1F912                                      ; fully-qualified     # ๐Ÿค’ face with thermometer",
+    "\uD83E\uDD12",
+
+    "1F915                                      ; fully-qualified     # ๐Ÿค• face with head-bandage",
+    "\uD83E\uDD15",
+
+    "1F922                                      ; fully-qualified     # ๐Ÿคข nauseated face",
+    "\uD83E\uDD22",
+
+    "1F92E                                      ; fully-qualified     # ๐Ÿคฎ face vomiting",
+    "\uD83E\uDD2E",
+
+    "1F927                                      ; fully-qualified     # ๐Ÿคง sneezing face",
+    "\uD83E\uDD27",
+
+    "1F607                                      ; fully-qualified     # ๐Ÿ˜‡ smiling face with halo",
+    "\uD83D\uDE07",
+
+    "1F920                                      ; fully-qualified     # ๐Ÿค  cowboy hat face",
+    "\uD83E\uDD20",
+
+    "1F973                                      ; fully-qualified     # ๐Ÿฅณ partying face",
+    "\uD83E\uDD73",
+
+    "1F974                                      ; fully-qualified     # ๐Ÿฅด woozy face",
+    "\uD83E\uDD74",
+
+    "1F97A                                      ; fully-qualified     # ๐Ÿฅบ pleading face",
+    "\uD83E\uDD7A",
+
+    "1F925                                      ; fully-qualified     # ๐Ÿคฅ lying face",
+    "\uD83E\uDD25",
+
+    "1F92B                                      ; fully-qualified     # ๐Ÿคซ shushing face",
+    "\uD83E\uDD2B",
+
+    "1F92D                                      ; fully-qualified     # ๐Ÿคญ face with hand over mouth",
+    "\uD83E\uDD2D",
+
+    "1F9D0                                      ; fully-qualified     # ๐Ÿง face with monocle",
+    "\uD83E\uDDD0",
+
+    "1F913                                      ; fully-qualified     # ๐Ÿค“ nerd face",
+    "\uD83E\uDD13",
+
+    "1F608                                      ; fully-qualified     # ๐Ÿ˜ˆ smiling face with horns",
+    "\uD83D\uDE08",
+
+    "1F47F                                      ; fully-qualified     # ๐Ÿ‘ฟ angry face with horns",
+    "\uD83D\uDC7F",
+
+    "1F921                                      ; fully-qualified     # ๐Ÿคก clown face",
+    "\uD83E\uDD21",
+
+    "1F479                                      ; fully-qualified     # ๐Ÿ‘น ogre",
+    "\uD83D\uDC79",
+
+    "1F47A                                      ; fully-qualified     # ๐Ÿ‘บ goblin",
+    "\uD83D\uDC7A",
+
+    "1F480                                      ; fully-qualified     # ๐Ÿ’€ skull",
+    "\uD83D\uDC80",
+
+    "2620 FE0F                                  ; fully-qualified     # โ˜ ๏ธ skull and crossbones",
+    "\u2620\uFE0F",
+
+    "2620                                       ; non-fully-qualified # โ˜  skull and crossbones",
+    "\u2620",
+
+    "1F47B                                      ; fully-qualified     # ๐Ÿ‘ป ghost",
+    "\uD83D\uDC7B",
+
+    "1F47D                                      ; fully-qualified     # ๐Ÿ‘ฝ alien",
+    "\uD83D\uDC7D",
+
+    "1F47E                                      ; fully-qualified     # ๐Ÿ‘พ alien monster",
+    "\uD83D\uDC7E",
+
+    "1F916                                      ; fully-qualified     # ๐Ÿค– robot face",
+    "\uD83E\uDD16",
+
+    "1F4A9                                      ; fully-qualified     # ๐Ÿ’ฉ pile of poo",
+    "\uD83D\uDCA9",
+
+    "1F63A                                      ; fully-qualified     # ๐Ÿ˜บ grinning cat face",
+    "\uD83D\uDE3A",
+
+    "1F638                                      ; fully-qualified     # ๐Ÿ˜ธ grinning cat face with smiling eyes",
+    "\uD83D\uDE38",
+
+    "1F639                                      ; fully-qualified     # ๐Ÿ˜น cat face with tears of joy",
+    "\uD83D\uDE39",
+
+    "1F63B                                      ; fully-qualified     # ๐Ÿ˜ป smiling cat face with heart-eyes",
+    "\uD83D\uDE3B",
+
+    "1F63C                                      ; fully-qualified     # ๐Ÿ˜ผ cat face with wry smile",
+    "\uD83D\uDE3C",
+
+    "1F63D                                      ; fully-qualified     # ๐Ÿ˜ฝ kissing cat face",
+    "\uD83D\uDE3D",
+
+    "1F640                                      ; fully-qualified     # ๐Ÿ™€ weary cat face",
+    "\uD83D\uDE40",
+
+    "1F63F                                      ; fully-qualified     # ๐Ÿ˜ฟ crying cat face",
+    "\uD83D\uDE3F",
+
+    "1F63E                                      ; fully-qualified     # ๐Ÿ˜พ pouting cat face",
+    "\uD83D\uDE3E",
+
+    "1F648                                      ; fully-qualified     # ๐Ÿ™ˆ see-no-evil monkey",
+    "\uD83D\uDE48",
+
+    "1F649                                      ; fully-qualified     # ๐Ÿ™‰ hear-no-evil monkey",
+    "\uD83D\uDE49",
+
+    "1F64A                                      ; fully-qualified     # ๐Ÿ™Š speak-no-evil monkey",
+    "\uD83D\uDE4A",
+
+    "1F3FB                                      ; fully-qualified     # ๐Ÿป light skin tone",
+    "\uD83C\uDFFB",
+
+    "1F3FC                                      ; fully-qualified     # ๐Ÿผ medium-light skin tone",
+    "\uD83C\uDFFC",
+
+    "1F3FD                                      ; fully-qualified     # ๐Ÿฝ medium skin tone",
+    "\uD83C\uDFFD",
+
+    "1F3FE                                      ; fully-qualified     # ๐Ÿพ medium-dark skin tone",
+    "\uD83C\uDFFE",
+
+    "1F3FF                                      ; fully-qualified     # ๐Ÿฟ dark skin tone",
+    "\uD83C\uDFFF",
+
+    "1F476                                      ; fully-qualified     # ๐Ÿ‘ถ baby",
+    "\uD83D\uDC76",
+
+    "1F476 1F3FB                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿป baby: light skin tone",
+    "\uD83D\uDC76\uD83C\uDFFB",
+
+    "1F476 1F3FC                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿผ baby: medium-light skin tone",
+    "\uD83D\uDC76\uD83C\uDFFC",
+
+    "1F476 1F3FD                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿฝ baby: medium skin tone",
+    "\uD83D\uDC76\uD83C\uDFFD",
+
+    "1F476 1F3FE                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿพ baby: medium-dark skin tone",
+    "\uD83D\uDC76\uD83C\uDFFE",
+
+    "1F476 1F3FF                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿฟ baby: dark skin tone",
+    "\uD83D\uDC76\uD83C\uDFFF",
+
+    "1F9D2                                      ; fully-qualified     # ๐Ÿง’ child",
+    "\uD83E\uDDD2",
+
+    "1F9D2 1F3FB                                ; fully-qualified     # ๐Ÿง’๐Ÿป child: light skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFB",
+
+    "1F9D2 1F3FC                                ; fully-qualified     # ๐Ÿง’๐Ÿผ child: medium-light skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFC",
+
+    "1F9D2 1F3FD                                ; fully-qualified     # ๐Ÿง’๐Ÿฝ child: medium skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFD",
+
+    "1F9D2 1F3FE                                ; fully-qualified     # ๐Ÿง’๐Ÿพ child: medium-dark skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFE",
+
+    "1F9D2 1F3FF                                ; fully-qualified     # ๐Ÿง’๐Ÿฟ child: dark skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFF",
+
+    "1F466                                      ; fully-qualified     # ๐Ÿ‘ฆ boy",
+    "\uD83D\uDC66",
+
+    "1F466 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿป boy: light skin tone",
+    "\uD83D\uDC66\uD83C\uDFFB",
+
+    "1F466 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿผ boy: medium-light skin tone",
+    "\uD83D\uDC66\uD83C\uDFFC",
+
+    "1F466 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿฝ boy: medium skin tone",
+    "\uD83D\uDC66\uD83C\uDFFD",
+
+    "1F466 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿพ boy: medium-dark skin tone",
+    "\uD83D\uDC66\uD83C\uDFFE",
+
+    "1F466 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿฟ boy: dark skin tone",
+    "\uD83D\uDC66\uD83C\uDFFF",
+
+    "1F467                                      ; fully-qualified     # ๐Ÿ‘ง girl",
+    "\uD83D\uDC67",
+
+    "1F467 1F3FB                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿป girl: light skin tone",
+    "\uD83D\uDC67\uD83C\uDFFB",
+
+    "1F467 1F3FC                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿผ girl: medium-light skin tone",
+    "\uD83D\uDC67\uD83C\uDFFC",
+
+    "1F467 1F3FD                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿฝ girl: medium skin tone",
+    "\uD83D\uDC67\uD83C\uDFFD",
+
+    "1F467 1F3FE                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿพ girl: medium-dark skin tone",
+    "\uD83D\uDC67\uD83C\uDFFE",
+
+    "1F467 1F3FF                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿฟ girl: dark skin tone",
+    "\uD83D\uDC67\uD83C\uDFFF",
+
+    "1F9D1                                      ; fully-qualified     # ๐Ÿง‘ adult",
+    "\uD83E\uDDD1",
+
+    "1F9D1 1F3FB                                ; fully-qualified     # ๐Ÿง‘๐Ÿป adult: light skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFB",
+
+    "1F9D1 1F3FC                                ; fully-qualified     # ๐Ÿง‘๐Ÿผ adult: medium-light skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFC",
+
+    "1F9D1 1F3FD                                ; fully-qualified     # ๐Ÿง‘๐Ÿฝ adult: medium skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFD",
+
+    "1F9D1 1F3FE                                ; fully-qualified     # ๐Ÿง‘๐Ÿพ adult: medium-dark skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFE",
+
+    "1F9D1 1F3FF                                ; fully-qualified     # ๐Ÿง‘๐Ÿฟ adult: dark skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFF",
+
+    "1F468                                      ; fully-qualified     # ๐Ÿ‘จ man",
+    "\uD83D\uDC68",
+
+    "1F468 1F3FB                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿป man: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB",
+
+    "1F468 1F3FC                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿผ man: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC",
+
+    "1F468 1F3FD                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝ man: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD",
+
+    "1F468 1F3FE                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿพ man: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE",
+
+    "1F468 1F3FF                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟ man: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF",
+
+    "1F469                                      ; fully-qualified     # ๐Ÿ‘ฉ woman",
+    "\uD83D\uDC69",
+
+    "1F469 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿป woman: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB",
+
+    "1F469 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผ woman: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC",
+
+    "1F469 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝ woman: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD",
+
+    "1F469 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพ woman: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE",
+
+    "1F469 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟ woman: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF",
+
+    "1F9D3                                      ; fully-qualified     # ๐Ÿง“ older adult",
+    "\uD83E\uDDD3",
+
+    "1F9D3 1F3FB                                ; fully-qualified     # ๐Ÿง“๐Ÿป older adult: light skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFB",
+
+    "1F9D3 1F3FC                                ; fully-qualified     # ๐Ÿง“๐Ÿผ older adult: medium-light skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFC",
+
+    "1F9D3 1F3FD                                ; fully-qualified     # ๐Ÿง“๐Ÿฝ older adult: medium skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFD",
+
+    "1F9D3 1F3FE                                ; fully-qualified     # ๐Ÿง“๐Ÿพ older adult: medium-dark skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFE",
+
+    "1F9D3 1F3FF                                ; fully-qualified     # ๐Ÿง“๐Ÿฟ older adult: dark skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFF",
+
+    "1F474                                      ; fully-qualified     # ๐Ÿ‘ด old man",
+    "\uD83D\uDC74",
+
+    "1F474 1F3FB                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿป old man: light skin tone",
+    "\uD83D\uDC74\uD83C\uDFFB",
+
+    "1F474 1F3FC                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿผ old man: medium-light skin tone",
+    "\uD83D\uDC74\uD83C\uDFFC",
+
+    "1F474 1F3FD                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿฝ old man: medium skin tone",
+    "\uD83D\uDC74\uD83C\uDFFD",
+
+    "1F474 1F3FE                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿพ old man: medium-dark skin tone",
+    "\uD83D\uDC74\uD83C\uDFFE",
+
+    "1F474 1F3FF                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿฟ old man: dark skin tone",
+    "\uD83D\uDC74\uD83C\uDFFF",
+
+    "1F475                                      ; fully-qualified     # ๐Ÿ‘ต old woman",
+    "\uD83D\uDC75",
+
+    "1F475 1F3FB                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿป old woman: light skin tone",
+    "\uD83D\uDC75\uD83C\uDFFB",
+
+    "1F475 1F3FC                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿผ old woman: medium-light skin tone",
+    "\uD83D\uDC75\uD83C\uDFFC",
+
+    "1F475 1F3FD                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿฝ old woman: medium skin tone",
+    "\uD83D\uDC75\uD83C\uDFFD",
+
+    "1F475 1F3FE                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿพ old woman: medium-dark skin tone",
+    "\uD83D\uDC75\uD83C\uDFFE",
+
+    "1F475 1F3FF                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿฟ old woman: dark skin tone",
+    "\uD83D\uDC75\uD83C\uDFFF",
+
+    "1F468 200D 2695 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โš•๏ธ man health worker",
+    "\uD83D\uDC68\u200D\u2695\uFE0F",
+
+    "1F468 200D 2695                            ; non-fully-qualified # ๐Ÿ‘จโ€โš• man health worker",
+    "\uD83D\uDC68\u200D\u2695",
+
+    "1F468 1F3FB 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โš•๏ธ man health worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2695\uFE0F",
+
+    "1F468 1F3FB 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โš• man health worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2695",
+
+    "1F468 1F3FC 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โš•๏ธ man health worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2695\uFE0F",
+
+    "1F468 1F3FC 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โš• man health worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2695",
+
+    "1F468 1F3FD 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โš•๏ธ man health worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2695\uFE0F",
+
+    "1F468 1F3FD 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โš• man health worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2695",
+
+    "1F468 1F3FE 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โš•๏ธ man health worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2695\uFE0F",
+
+    "1F468 1F3FE 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โš• man health worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2695",
+
+    "1F468 1F3FF 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โš•๏ธ man health worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2695\uFE0F",
+
+    "1F468 1F3FF 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โš• man health worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2695",
+
+    "1F469 200D 2695 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โš•๏ธ woman health worker",
+    "\uD83D\uDC69\u200D\u2695\uFE0F",
+
+    "1F469 200D 2695                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โš• woman health worker",
+    "\uD83D\uDC69\u200D\u2695",
+
+    "1F469 1F3FB 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โš•๏ธ woman health worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2695\uFE0F",
+
+    "1F469 1F3FB 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โš• woman health worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2695",
+
+    "1F469 1F3FC 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โš•๏ธ woman health worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2695\uFE0F",
+
+    "1F469 1F3FC 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โš• woman health worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2695",
+
+    "1F469 1F3FD 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โš•๏ธ woman health worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2695\uFE0F",
+
+    "1F469 1F3FD 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โš• woman health worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2695",
+
+    "1F469 1F3FE 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โš•๏ธ woman health worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2695\uFE0F",
+
+    "1F469 1F3FE 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โš• woman health worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2695",
+
+    "1F469 1F3FF 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โš•๏ธ woman health worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2695\uFE0F",
+
+    "1F469 1F3FF 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โš• woman health worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2695",
+
+    "1F468 200D 1F393                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽ“ man student",
+    "\uD83D\uDC68\u200D\uD83C\uDF93",
+
+    "1F468 1F3FB 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽ“ man student: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF93",
+
+    "1F468 1F3FC 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽ“ man student: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF93",
+
+    "1F468 1F3FD 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽ“ man student: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF93",
+
+    "1F468 1F3FE 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽ“ man student: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF93",
+
+    "1F468 1F3FF 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽ“ man student: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF93",
+
+    "1F469 200D 1F393                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽ“ woman student",
+    "\uD83D\uDC69\u200D\uD83C\uDF93",
+
+    "1F469 1F3FB 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽ“ woman student: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF93",
+
+    "1F469 1F3FC 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽ“ woman student: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF93",
+
+    "1F469 1F3FD 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽ“ woman student: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF93",
+
+    "1F469 1F3FE 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽ“ woman student: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF93",
+
+    "1F469 1F3FF 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽ“ woman student: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF93",
+
+    "1F468 200D 1F3EB                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿซ man teacher",
+    "\uD83D\uDC68\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FB 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿซ man teacher: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FC 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿซ man teacher: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FD 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿซ man teacher: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FE 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿซ man teacher: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FF 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿซ man teacher: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFEB",
+
+    "1F469 200D 1F3EB                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿซ woman teacher",
+    "\uD83D\uDC69\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FB 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿซ woman teacher: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FC 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿซ woman teacher: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FD 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿซ woman teacher: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FE 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿซ woman teacher: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FF 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿซ woman teacher: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFEB",
+
+    "1F468 200D 2696 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โš–๏ธ man judge",
+    "\uD83D\uDC68\u200D\u2696\uFE0F",
+
+    "1F468 200D 2696                            ; non-fully-qualified # ๐Ÿ‘จโ€โš– man judge",
+    "\uD83D\uDC68\u200D\u2696",
+
+    "1F468 1F3FB 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โš–๏ธ man judge: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2696\uFE0F",
+
+    "1F468 1F3FB 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โš– man judge: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2696",
+
+    "1F468 1F3FC 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โš–๏ธ man judge: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2696\uFE0F",
+
+    "1F468 1F3FC 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โš– man judge: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2696",
+
+    "1F468 1F3FD 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โš–๏ธ man judge: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2696\uFE0F",
+
+    "1F468 1F3FD 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โš– man judge: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2696",
+
+    "1F468 1F3FE 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โš–๏ธ man judge: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2696\uFE0F",
+
+    "1F468 1F3FE 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โš– man judge: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2696",
+
+    "1F468 1F3FF 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โš–๏ธ man judge: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2696\uFE0F",
+
+    "1F468 1F3FF 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โš– man judge: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2696",
+
+    "1F469 200D 2696 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โš–๏ธ woman judge",
+    "\uD83D\uDC69\u200D\u2696\uFE0F",
+
+    "1F469 200D 2696                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โš– woman judge",
+    "\uD83D\uDC69\u200D\u2696",
+
+    "1F469 1F3FB 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โš–๏ธ woman judge: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2696\uFE0F",
+
+    "1F469 1F3FB 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โš– woman judge: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2696",
+
+    "1F469 1F3FC 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โš–๏ธ woman judge: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2696\uFE0F",
+
+    "1F469 1F3FC 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โš– woman judge: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2696",
+
+    "1F469 1F3FD 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โš–๏ธ woman judge: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2696\uFE0F",
+
+    "1F469 1F3FD 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โš– woman judge: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2696",
+
+    "1F469 1F3FE 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โš–๏ธ woman judge: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2696\uFE0F",
+
+    "1F469 1F3FE 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โš– woman judge: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2696",
+
+    "1F469 1F3FF 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โš–๏ธ woman judge: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2696\uFE0F",
+
+    "1F469 1F3FF 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โš– woman judge: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2696",
+
+    "1F468 200D 1F33E                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŒพ man farmer",
+    "\uD83D\uDC68\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FB 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŒพ man farmer: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FC 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŒพ man farmer: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FD 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŒพ man farmer: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FE 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŒพ man farmer: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FF 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŒพ man farmer: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF3E",
+
+    "1F469 200D 1F33E                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŒพ woman farmer",
+    "\uD83D\uDC69\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FB 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŒพ woman farmer: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FC 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŒพ woman farmer: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FD 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŒพ woman farmer: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FE 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŒพ woman farmer: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FF 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŒพ woman farmer: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF3E",
+
+    "1F468 200D 1F373                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿณ man cook",
+    "\uD83D\uDC68\u200D\uD83C\uDF73",
+
+    "1F468 1F3FB 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿณ man cook: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF73",
+
+    "1F468 1F3FC 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿณ man cook: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF73",
+
+    "1F468 1F3FD 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿณ man cook: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF73",
+
+    "1F468 1F3FE 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿณ man cook: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF73",
+
+    "1F468 1F3FF 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿณ man cook: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF73",
+
+    "1F469 200D 1F373                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿณ woman cook",
+    "\uD83D\uDC69\u200D\uD83C\uDF73",
+
+    "1F469 1F3FB 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿณ woman cook: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF73",
+
+    "1F469 1F3FC 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿณ woman cook: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF73",
+
+    "1F469 1F3FD 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿณ woman cook: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF73",
+
+    "1F469 1F3FE 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿณ woman cook: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF73",
+
+    "1F469 1F3FF 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿณ woman cook: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF73",
+
+    "1F468 200D 1F527                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ”ง man mechanic",
+    "\uD83D\uDC68\u200D\uD83D\uDD27",
+
+    "1F468 1F3FB 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ”ง man mechanic: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDD27",
+
+    "1F468 1F3FC 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ”ง man mechanic: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDD27",
+
+    "1F468 1F3FD 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ”ง man mechanic: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDD27",
+
+    "1F468 1F3FE 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ”ง man mechanic: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDD27",
+
+    "1F468 1F3FF 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ”ง man mechanic: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDD27",
+
+    "1F469 200D 1F527                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ”ง woman mechanic",
+    "\uD83D\uDC69\u200D\uD83D\uDD27",
+
+    "1F469 1F3FB 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ”ง woman mechanic: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDD27",
+
+    "1F469 1F3FC 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ”ง woman mechanic: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDD27",
+
+    "1F469 1F3FD 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ”ง woman mechanic: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDD27",
+
+    "1F469 1F3FE 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ”ง woman mechanic: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDD27",
+
+    "1F469 1F3FF 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ”ง woman mechanic: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDD27",
+
+    "1F468 200D 1F3ED                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿญ man factory worker",
+    "\uD83D\uDC68\u200D\uD83C\uDFED",
+
+    "1F468 1F3FB 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿญ man factory worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFED",
+
+    "1F468 1F3FC 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿญ man factory worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFED",
+
+    "1F468 1F3FD 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿญ man factory worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFED",
+
+    "1F468 1F3FE 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿญ man factory worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFED",
+
+    "1F468 1F3FF 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿญ man factory worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFED",
+
+    "1F469 200D 1F3ED                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿญ woman factory worker",
+    "\uD83D\uDC69\u200D\uD83C\uDFED",
+
+    "1F469 1F3FB 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿญ woman factory worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFED",
+
+    "1F469 1F3FC 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿญ woman factory worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFED",
+
+    "1F469 1F3FD 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿญ woman factory worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFED",
+
+    "1F469 1F3FE 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿญ woman factory worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFED",
+
+    "1F469 1F3FF 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿญ woman factory worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFED",
+
+    "1F468 200D 1F4BC                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ’ผ man office worker",
+    "\uD83D\uDC68\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FB 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ’ผ man office worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FC 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ’ผ man office worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FD 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ’ผ man office worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FE 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ผ man office worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FF 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ’ผ man office worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDCBC",
+
+    "1F469 200D 1F4BC                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ’ผ woman office worker",
+    "\uD83D\uDC69\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FB 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ’ผ woman office worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FC 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ’ผ woman office worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FD 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ผ woman office worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FE 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ’ผ woman office worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FF 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ’ผ woman office worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDCBC",
+
+    "1F468 200D 1F52C                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ”ฌ man scientist",
+    "\uD83D\uDC68\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FB 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ”ฌ man scientist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FC 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ”ฌ man scientist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FD 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ”ฌ man scientist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FE 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ”ฌ man scientist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FF 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ”ฌ man scientist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDD2C",
+
+    "1F469 200D 1F52C                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ”ฌ woman scientist",
+    "\uD83D\uDC69\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FB 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ”ฌ woman scientist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FC 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ”ฌ woman scientist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FD 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ”ฌ woman scientist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FE 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ”ฌ woman scientist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FF 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ”ฌ woman scientist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDD2C",
+
+    "1F468 200D 1F4BB                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ’ป man technologist",
+    "\uD83D\uDC68\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FB 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ’ป man technologist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FC 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ’ป man technologist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FD 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ’ป man technologist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FE 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ป man technologist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FF 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ’ป man technologist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDCBB",
+
+    "1F469 200D 1F4BB                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ’ป woman technologist",
+    "\uD83D\uDC69\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FB 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ’ป woman technologist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FC 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ’ป woman technologist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FD 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป woman technologist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FE 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ’ป woman technologist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FF 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ’ป woman technologist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDCBB",
+
+    "1F468 200D 1F3A4                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽค man singer",
+    "\uD83D\uDC68\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FB 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽค man singer: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FC 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽค man singer: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FD 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽค man singer: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FE 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽค man singer: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FF 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽค man singer: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFA4",
+
+    "1F469 200D 1F3A4                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽค woman singer",
+    "\uD83D\uDC69\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FB 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽค woman singer: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FC 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽค woman singer: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FD 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽค woman singer: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FE 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽค woman singer: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FF 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽค woman singer: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFA4",
+
+    "1F468 200D 1F3A8                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽจ man artist",
+    "\uD83D\uDC68\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FB 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽจ man artist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FC 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽจ man artist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FD 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽจ man artist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FE 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽจ man artist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FF 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽจ man artist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFA8",
+
+    "1F469 200D 1F3A8                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽจ woman artist",
+    "\uD83D\uDC69\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FB 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽจ woman artist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FC 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽจ woman artist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FD 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽจ woman artist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FE 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽจ woman artist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FF 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽจ woman artist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFA8",
+
+    "1F468 200D 2708 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โœˆ๏ธ man pilot",
+    "\uD83D\uDC68\u200D\u2708\uFE0F",
+
+    "1F468 200D 2708                            ; non-fully-qualified # ๐Ÿ‘จโ€โœˆ man pilot",
+    "\uD83D\uDC68\u200D\u2708",
+
+    "1F468 1F3FB 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โœˆ๏ธ man pilot: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2708\uFE0F",
+
+    "1F468 1F3FB 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โœˆ man pilot: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2708",
+
+    "1F468 1F3FC 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โœˆ๏ธ man pilot: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2708\uFE0F",
+
+    "1F468 1F3FC 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โœˆ man pilot: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2708",
+
+    "1F468 1F3FD 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โœˆ๏ธ man pilot: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2708\uFE0F",
+
+    "1F468 1F3FD 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โœˆ man pilot: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2708",
+
+    "1F468 1F3FE 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โœˆ๏ธ man pilot: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2708\uFE0F",
+
+    "1F468 1F3FE 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โœˆ man pilot: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2708",
+
+    "1F468 1F3FF 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โœˆ๏ธ man pilot: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2708\uFE0F",
+
+    "1F468 1F3FF 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โœˆ man pilot: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2708",
+
+    "1F469 200D 2708 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โœˆ๏ธ woman pilot",
+    "\uD83D\uDC69\u200D\u2708\uFE0F",
+
+    "1F469 200D 2708                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โœˆ woman pilot",
+    "\uD83D\uDC69\u200D\u2708",
+
+    "1F469 1F3FB 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โœˆ๏ธ woman pilot: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2708\uFE0F",
+
+    "1F469 1F3FB 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โœˆ woman pilot: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2708",
+
+    "1F469 1F3FC 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โœˆ๏ธ woman pilot: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2708\uFE0F",
+
+    "1F469 1F3FC 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โœˆ woman pilot: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2708",
+
+    "1F469 1F3FD 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โœˆ๏ธ woman pilot: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2708\uFE0F",
+
+    "1F469 1F3FD 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โœˆ woman pilot: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2708",
+
+    "1F469 1F3FE 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โœˆ๏ธ woman pilot: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2708\uFE0F",
+
+    "1F469 1F3FE 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โœˆ woman pilot: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2708",
+
+    "1F469 1F3FF 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โœˆ๏ธ woman pilot: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2708\uFE0F",
+
+    "1F469 1F3FF 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โœˆ woman pilot: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2708",
+
+    "1F468 200D 1F680                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿš€ man astronaut",
+    "\uD83D\uDC68\u200D\uD83D\uDE80",
+
+    "1F468 1F3FB 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿš€ man astronaut: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDE80",
+
+    "1F468 1F3FC 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿš€ man astronaut: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDE80",
+
+    "1F468 1F3FD 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿš€ man astronaut: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDE80",
+
+    "1F468 1F3FE 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿš€ man astronaut: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDE80",
+
+    "1F468 1F3FF 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿš€ man astronaut: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDE80",
+
+    "1F469 200D 1F680                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿš€ woman astronaut",
+    "\uD83D\uDC69\u200D\uD83D\uDE80",
+
+    "1F469 1F3FB 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿš€ woman astronaut: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDE80",
+
+    "1F469 1F3FC 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿš€ woman astronaut: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDE80",
+
+    "1F469 1F3FD 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿš€ woman astronaut: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDE80",
+
+    "1F469 1F3FE 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿš€ woman astronaut: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDE80",
+
+    "1F469 1F3FF 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿš€ woman astronaut: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDE80",
+
+    "1F468 200D 1F692                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿš’ man firefighter",
+    "\uD83D\uDC68\u200D\uD83D\uDE92",
+
+    "1F468 1F3FB 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿš’ man firefighter: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDE92",
+
+    "1F468 1F3FC 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿš’ man firefighter: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDE92",
+
+    "1F468 1F3FD 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿš’ man firefighter: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDE92",
+
+    "1F468 1F3FE 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿš’ man firefighter: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDE92",
+
+    "1F468 1F3FF 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿš’ man firefighter: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDE92",
+
+    "1F469 200D 1F692                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿš’ woman firefighter",
+    "\uD83D\uDC69\u200D\uD83D\uDE92",
+
+    "1F469 1F3FB 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿš’ woman firefighter: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDE92",
+
+    "1F469 1F3FC 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿš’ woman firefighter: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDE92",
+
+    "1F469 1F3FD 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿš’ woman firefighter: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDE92",
+
+    "1F469 1F3FE 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿš’ woman firefighter: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDE92",
+
+    "1F469 1F3FF 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿš’ woman firefighter: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDE92",
+
+    "1F46E                                      ; fully-qualified     # ๐Ÿ‘ฎ police officer",
+    "\uD83D\uDC6E",
+
+    "1F46E 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿป police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB",
+
+    "1F46E 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผ police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC",
+
+    "1F46E 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝ police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD",
+
+    "1F46E 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพ police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE",
+
+    "1F46E 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟ police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF",
+
+    "1F46E 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ฎโ€โ™‚๏ธ man police officer",
+    "\uD83D\uDC6E\u200D\u2642\uFE0F",
+
+    "1F46E 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ฎโ€โ™‚ man police officer",
+    "\uD83D\uDC6E\u200D\u2642",
+
+    "1F46E 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚๏ธ man police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚ man police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2642",
+
+    "1F46E 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผโ€โ™‚๏ธ man police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿผโ€โ™‚ man police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2642",
+
+    "1F46E 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝโ€โ™‚๏ธ man police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฝโ€โ™‚ man police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2642",
+
+    "1F46E 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพโ€โ™‚๏ธ man police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿพโ€โ™‚ man police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2642",
+
+    "1F46E 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟโ€โ™‚๏ธ man police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฟโ€โ™‚ man police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2642",
+
+    "1F46E 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ฎโ€โ™€๏ธ woman police officer",
+    "\uD83D\uDC6E\u200D\u2640\uFE0F",
+
+    "1F46E 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ฎโ€โ™€ woman police officer",
+    "\uD83D\uDC6E\u200D\u2640",
+
+    "1F46E 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™€๏ธ woman police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿปโ€โ™€ woman police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2640",
+
+    "1F46E 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผโ€โ™€๏ธ woman police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿผโ€โ™€ woman police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2640",
+
+    "1F46E 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝโ€โ™€๏ธ woman police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฝโ€โ™€ woman police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2640",
+
+    "1F46E 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพโ€โ™€๏ธ woman police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿพโ€โ™€ woman police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2640",
+
+    "1F46E 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ woman police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฟโ€โ™€ woman police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2640",
+
+    "1F575 FE0F                                 ; fully-qualified     # ๐Ÿ•ต๏ธ detective",
+    "\uD83D\uDD75\uFE0F",
+
+    "1F575                                      ; non-fully-qualified # ๐Ÿ•ต detective",
+    "\uD83D\uDD75",
+
+    "1F575 1F3FB                                ; fully-qualified     # ๐Ÿ•ต๐Ÿป detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB",
+
+    "1F575 1F3FC                                ; fully-qualified     # ๐Ÿ•ต๐Ÿผ detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC",
+
+    "1F575 1F3FD                                ; fully-qualified     # ๐Ÿ•ต๐Ÿฝ detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD",
+
+    "1F575 1F3FE                                ; fully-qualified     # ๐Ÿ•ต๐Ÿพ detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE",
+
+    "1F575 1F3FF                                ; fully-qualified     # ๐Ÿ•ต๐Ÿฟ detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF",
+
+    "1F575 FE0F 200D 2642 FE0F                  ; fully-qualified     # ๐Ÿ•ต๏ธโ€โ™‚๏ธ man detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2642\uFE0F",
+
+    "1F575 200D 2642 FE0F                       ; non-fully-qualified # ๐Ÿ•ตโ€โ™‚๏ธ man detective",
+    "\uD83D\uDD75\u200D\u2642\uFE0F",
+
+    "1F575 FE0F 200D 2642                       ; non-fully-qualified # ๐Ÿ•ต๏ธโ€โ™‚ man detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2642",
+
+    "1F575 200D 2642                            ; non-fully-qualified # ๐Ÿ•ตโ€โ™‚ man detective",
+    "\uD83D\uDD75\u200D\u2642",
+
+    "1F575 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿปโ€โ™‚๏ธ man detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F575 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿปโ€โ™‚ man detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2642",
+
+    "1F575 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿผโ€โ™‚๏ธ man detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F575 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿผโ€โ™‚ man detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2642",
+
+    "1F575 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฝโ€โ™‚๏ธ man detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F575 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฝโ€โ™‚ man detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2642",
+
+    "1F575 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿพโ€โ™‚๏ธ man detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F575 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿพโ€โ™‚ man detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2642",
+
+    "1F575 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฟโ€โ™‚๏ธ man detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F575 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฟโ€โ™‚ man detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2642",
+
+    "1F575 FE0F 200D 2640 FE0F                  ; fully-qualified     # ๐Ÿ•ต๏ธโ€โ™€๏ธ woman detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2640\uFE0F",
+
+    "1F575 200D 2640 FE0F                       ; non-fully-qualified # ๐Ÿ•ตโ€โ™€๏ธ woman detective",
+    "\uD83D\uDD75\u200D\u2640\uFE0F",
+
+    "1F575 FE0F 200D 2640                       ; non-fully-qualified # ๐Ÿ•ต๏ธโ€โ™€ woman detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2640",
+
+    "1F575 200D 2640                            ; non-fully-qualified # ๐Ÿ•ตโ€โ™€ woman detective",
+    "\uD83D\uDD75\u200D\u2640",
+
+    "1F575 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿปโ€โ™€๏ธ woman detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F575 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿปโ€โ™€ woman detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2640",
+
+    "1F575 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿผโ€โ™€๏ธ woman detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F575 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿผโ€โ™€ woman detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2640",
+
+    "1F575 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฝโ€โ™€๏ธ woman detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F575 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฝโ€โ™€ woman detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2640",
+
+    "1F575 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿพโ€โ™€๏ธ woman detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F575 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿพโ€โ™€ woman detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2640",
+
+    "1F575 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฟโ€โ™€๏ธ woman detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F575 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฟโ€โ™€ woman detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2640",
+
+    "1F482                                      ; fully-qualified     # ๐Ÿ’‚ guard",
+    "\uD83D\uDC82",
+
+    "1F482 1F3FB                                ; fully-qualified     # ๐Ÿ’‚๐Ÿป guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB",
+
+    "1F482 1F3FC                                ; fully-qualified     # ๐Ÿ’‚๐Ÿผ guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC",
+
+    "1F482 1F3FD                                ; fully-qualified     # ๐Ÿ’‚๐Ÿฝ guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD",
+
+    "1F482 1F3FE                                ; fully-qualified     # ๐Ÿ’‚๐Ÿพ guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE",
+
+    "1F482 1F3FF                                ; fully-qualified     # ๐Ÿ’‚๐Ÿฟ guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF",
+
+    "1F482 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ’‚โ€โ™‚๏ธ man guard",
+    "\uD83D\uDC82\u200D\u2642\uFE0F",
+
+    "1F482 200D 2642                            ; non-fully-qualified # ๐Ÿ’‚โ€โ™‚ man guard",
+    "\uD83D\uDC82\u200D\u2642",
+
+    "1F482 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿปโ€โ™‚๏ธ man guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F482 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿปโ€โ™‚ man guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2642",
+
+    "1F482 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿผโ€โ™‚๏ธ man guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F482 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿผโ€โ™‚ man guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2642",
+
+    "1F482 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฝโ€โ™‚๏ธ man guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F482 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฝโ€โ™‚ man guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2642",
+
+    "1F482 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿพโ€โ™‚๏ธ man guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F482 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿพโ€โ™‚ man guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2642",
+
+    "1F482 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฟโ€โ™‚๏ธ man guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F482 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฟโ€โ™‚ man guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2642",
+
+    "1F482 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ’‚โ€โ™€๏ธ woman guard",
+    "\uD83D\uDC82\u200D\u2640\uFE0F",
+
+    "1F482 200D 2640                            ; non-fully-qualified # ๐Ÿ’‚โ€โ™€ woman guard",
+    "\uD83D\uDC82\u200D\u2640",
+
+    "1F482 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿปโ€โ™€๏ธ woman guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F482 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿปโ€โ™€ woman guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2640",
+
+    "1F482 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿผโ€โ™€๏ธ woman guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F482 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿผโ€โ™€ woman guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2640",
+
+    "1F482 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฝโ€โ™€๏ธ woman guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F482 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฝโ€โ™€ woman guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2640",
+
+    "1F482 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿพโ€โ™€๏ธ woman guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F482 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿพโ€โ™€ woman guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2640",
+
+    "1F482 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฟโ€โ™€๏ธ woman guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F482 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฟโ€โ™€ woman guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2640",
+
+    "1F477                                      ; fully-qualified     # ๐Ÿ‘ท construction worker",
+    "\uD83D\uDC77",
+
+    "1F477 1F3FB                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿป construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB",
+
+    "1F477 1F3FC                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿผ construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC",
+
+    "1F477 1F3FD                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝ construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD",
+
+    "1F477 1F3FE                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿพ construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE",
+
+    "1F477 1F3FF                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟ construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF",
+
+    "1F477 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ทโ€โ™‚๏ธ man construction worker",
+    "\uD83D\uDC77\u200D\u2642\uFE0F",
+
+    "1F477 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ทโ€โ™‚ man construction worker",
+    "\uD83D\uDC77\u200D\u2642",
+
+    "1F477 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿปโ€โ™‚๏ธ man construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F477 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿปโ€โ™‚ man construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2642",
+
+    "1F477 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿผโ€โ™‚๏ธ man construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F477 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿผโ€โ™‚ man construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2642",
+
+    "1F477 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝโ€โ™‚๏ธ man construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F477 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฝโ€โ™‚ man construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2642",
+
+    "1F477 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿพโ€โ™‚๏ธ man construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F477 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿพโ€โ™‚ man construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2642",
+
+    "1F477 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟโ€โ™‚๏ธ man construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F477 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฟโ€โ™‚ man construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2642",
+
+    "1F477 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ทโ€โ™€๏ธ woman construction worker",
+    "\uD83D\uDC77\u200D\u2640\uFE0F",
+
+    "1F477 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ทโ€โ™€ woman construction worker",
+    "\uD83D\uDC77\u200D\u2640",
+
+    "1F477 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿปโ€โ™€๏ธ woman construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F477 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿปโ€โ™€ woman construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2640",
+
+    "1F477 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿผโ€โ™€๏ธ woman construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F477 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿผโ€โ™€ woman construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2640",
+
+    "1F477 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝโ€โ™€๏ธ woman construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F477 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฝโ€โ™€ woman construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2640",
+
+    "1F477 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿพโ€โ™€๏ธ woman construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F477 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿพโ€โ™€ woman construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2640",
+
+    "1F477 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟโ€โ™€๏ธ woman construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F477 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฟโ€โ™€ woman construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2640",
+
+    "1F934                                      ; fully-qualified     # ๐Ÿคด prince",
+    "\uD83E\uDD34",
+
+    "1F934 1F3FB                                ; fully-qualified     # ๐Ÿคด๐Ÿป prince: light skin tone",
+    "\uD83E\uDD34\uD83C\uDFFB",
+
+    "1F934 1F3FC                                ; fully-qualified     # ๐Ÿคด๐Ÿผ prince: medium-light skin tone",
+    "\uD83E\uDD34\uD83C\uDFFC",
+
+    "1F934 1F3FD                                ; fully-qualified     # ๐Ÿคด๐Ÿฝ prince: medium skin tone",
+    "\uD83E\uDD34\uD83C\uDFFD",
+
+    "1F934 1F3FE                                ; fully-qualified     # ๐Ÿคด๐Ÿพ prince: medium-dark skin tone",
+    "\uD83E\uDD34\uD83C\uDFFE",
+
+    "1F934 1F3FF                                ; fully-qualified     # ๐Ÿคด๐Ÿฟ prince: dark skin tone",
+    "\uD83E\uDD34\uD83C\uDFFF",
+
+    "1F478                                      ; fully-qualified     # ๐Ÿ‘ธ princess",
+    "\uD83D\uDC78",
+
+    "1F478 1F3FB                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿป princess: light skin tone",
+    "\uD83D\uDC78\uD83C\uDFFB",
+
+    "1F478 1F3FC                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿผ princess: medium-light skin tone",
+    "\uD83D\uDC78\uD83C\uDFFC",
+
+    "1F478 1F3FD                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿฝ princess: medium skin tone",
+    "\uD83D\uDC78\uD83C\uDFFD",
+
+    "1F478 1F3FE                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿพ princess: medium-dark skin tone",
+    "\uD83D\uDC78\uD83C\uDFFE",
+
+    "1F478 1F3FF                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿฟ princess: dark skin tone",
+    "\uD83D\uDC78\uD83C\uDFFF",
+
+    "1F473                                      ; fully-qualified     # ๐Ÿ‘ณ person wearing turban",
+    "\uD83D\uDC73",
+
+    "1F473 1F3FB                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿป person wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB",
+
+    "1F473 1F3FC                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผ person wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC",
+
+    "1F473 1F3FD                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝ person wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD",
+
+    "1F473 1F3FE                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพ person wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE",
+
+    "1F473 1F3FF                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟ person wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF",
+
+    "1F473 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ณโ€โ™‚๏ธ man wearing turban",
+    "\uD83D\uDC73\u200D\u2642\uFE0F",
+
+    "1F473 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ณโ€โ™‚ man wearing turban",
+    "\uD83D\uDC73\u200D\u2642",
+
+    "1F473 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ man wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F473 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿปโ€โ™‚ man wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2642",
+
+    "1F473 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผโ€โ™‚๏ธ man wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F473 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿผโ€โ™‚ man wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2642",
+
+    "1F473 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝโ€โ™‚๏ธ man wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F473 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฝโ€โ™‚ man wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2642",
+
+    "1F473 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพโ€โ™‚๏ธ man wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F473 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿพโ€โ™‚ man wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2642",
+
+    "1F473 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟโ€โ™‚๏ธ man wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F473 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฟโ€โ™‚ man wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2642",
+
+    "1F473 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ณโ€โ™€๏ธ woman wearing turban",
+    "\uD83D\uDC73\u200D\u2640\uFE0F",
+
+    "1F473 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ณโ€โ™€ woman wearing turban",
+    "\uD83D\uDC73\u200D\u2640",
+
+    "1F473 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿปโ€โ™€๏ธ woman wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F473 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿปโ€โ™€ woman wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2640",
+
+    "1F473 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผโ€โ™€๏ธ woman wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F473 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿผโ€โ™€ woman wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2640",
+
+    "1F473 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝโ€โ™€๏ธ woman wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F473 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฝโ€โ™€ woman wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2640",
+
+    "1F473 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพโ€โ™€๏ธ woman wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F473 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿพโ€โ™€ woman wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2640",
+
+    "1F473 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟโ€โ™€๏ธ woman wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F473 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฟโ€โ™€ woman wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2640",
+
+    "1F472                                      ; fully-qualified     # ๐Ÿ‘ฒ man with Chinese cap",
+    "\uD83D\uDC72",
+
+    "1F472 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿป man with Chinese cap: light skin tone",
+    "\uD83D\uDC72\uD83C\uDFFB",
+
+    "1F472 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿผ man with Chinese cap: medium-light skin tone",
+    "\uD83D\uDC72\uD83C\uDFFC",
+
+    "1F472 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿฝ man with Chinese cap: medium skin tone",
+    "\uD83D\uDC72\uD83C\uDFFD",
+
+    "1F472 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿพ man with Chinese cap: medium-dark skin tone",
+    "\uD83D\uDC72\uD83C\uDFFE",
+
+    "1F472 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿฟ man with Chinese cap: dark skin tone",
+    "\uD83D\uDC72\uD83C\uDFFF",
+
+    "1F9D5                                      ; fully-qualified     # ๐Ÿง• woman with headscarf",
+    "\uD83E\uDDD5",
+
+    "1F9D5 1F3FB                                ; fully-qualified     # ๐Ÿง•๐Ÿป woman with headscarf: light skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFB",
+
+    "1F9D5 1F3FC                                ; fully-qualified     # ๐Ÿง•๐Ÿผ woman with headscarf: medium-light skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFC",
+
+    "1F9D5 1F3FD                                ; fully-qualified     # ๐Ÿง•๐Ÿฝ woman with headscarf: medium skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFD",
+
+    "1F9D5 1F3FE                                ; fully-qualified     # ๐Ÿง•๐Ÿพ woman with headscarf: medium-dark skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFE",
+
+    "1F9D5 1F3FF                                ; fully-qualified     # ๐Ÿง•๐Ÿฟ woman with headscarf: dark skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFF",
+
+    "1F9D4                                      ; fully-qualified     # ๐Ÿง” bearded person",
+    "\uD83E\uDDD4",
+
+    "1F9D4 1F3FB                                ; fully-qualified     # ๐Ÿง”๐Ÿป bearded person: light skin t

<TRUNCATED>

[16/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '<EMOJI>' token type.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0e903cab
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0e903cab
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0e903cab

Branch: refs/heads/branch_8x
Commit: 0e903cab47e98c75d4fe0bb2a33a84e8f3c648ff
Parents: 5a60c3e
Author: Steve Rowe <sa...@apache.org>
Authored: Tue Jan 8 13:33:49 2019 -0500
Committer: Steve Rowe <sa...@apache.org>
Committed: Tue Jan 8 13:34:18 2019 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |     8 +
 lucene/analysis/common/build.xml                |    32 +-
 .../charfilter/HTMLStripCharFilter.java         |   834 +-
 .../charfilter/HTMLStripCharFilter.jflex        |    22 +-
 .../analysis/standard/ClassicTokenizerImpl.java |   141 +-
 .../standard/UAX29URLEmailTokenizer.java        |    44 +-
 .../standard/UAX29URLEmailTokenizerImpl.java    | 74906 +++++++++--------
 .../standard/UAX29URLEmailTokenizerImpl.jflex   |   216 +-
 .../wikipedia/WikipediaTokenizerImpl.java       |   465 +-
 .../charfilter/HTMLStripCharFilterTest.java     |     2 +-
 .../standard/TestUAX29URLEmailAnalyzer.java     |     4 +-
 .../standard/TestUAX29URLEmailTokenizer.java    |    76 +-
 lucene/common-build.xml                         |    21 +-
 .../src/data/jflex/UnicodeEmojiProperties.jflex |    25 +
 .../src/data/jflex/getUnicodeEmojiProperties.pl |   168 +
 lucene/core/src/data/jflex/skeleton.default     |   342 +
 .../jflex/skeleton.disable.buffer.expansion.txt |   348 +
 .../standard/StandardTokenizerImpl.java         |   637 +-
 .../standard/StandardTokenizerImpl.jflex        |   206 +-
 .../analysis/standard/TestStandardAnalyzer.java |   131 +-
 .../EmojiTokenizationTestUnicode_11_0.java      | 10756 +++
 .../standard/WordBreakTestUnicode_6_3_0.java    |  5537 --
 .../standard/WordBreakTestUnicode_9_0_0.java    |  8276 ++
 .../standard/generateEmojiTokenizationTest.pl   |   150 +
 .../generateJavaUnicodeWordBreakTest.pl         |    41 +-
 25 files changed, 62395 insertions(+), 40993 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4f677e7..3a186bc 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -238,6 +238,11 @@ Optimizations
 
 ======================= Lucene 7.7.0 =======================
 
+Changes in Runtime Behavior
+
+* LUCENE-8527: StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0,
+  and provide Unicode UTS#51 v11.0 Emoji tokenization with the "<EMOJI>" token type. 
+
 Build
 
 * LUCENE-8611: Update randomizedtesting to 2.7.2, JUnit to 4.12, add hamcrest-core 
@@ -290,6 +295,9 @@ Improvements
 
 * LUCENE-8581: Change LatLonShape encoding to use 4 bytes Per Dimension.
   (Ignacio Vera, Nick Knize, Adrien Grand)
+  
+* LUCENE-8527: Upgrade JFlex dependency to 1.7.0; in StandardTokenizer and UAX29URLEmailTokenizer,
+  increase supported Unicode version from 6.3 to 9.0, and support Unicode UTS#51 v11.0 Emoji tokenization.
 
 Optimizations
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/build.xml b/lucene/analysis/common/build.xml
index b8eb37a..f752ecc 100644
--- a/lucene/analysis/common/build.xml
+++ b/lucene/analysis/common/build.xml
@@ -33,18 +33,14 @@
 
   <property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
 
-  <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-ClassicAnalyzer,-jflex-UAX29URLEmailTokenizer,
-                                -jflex-wiki-tokenizer,-jflex-HTMLStripCharFilter"/>
-
-  <target name="-jflex-HTMLStripCharFilter"
-          depends="init,generate-jflex-html-char-entities">
-    <jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex"
-           outdir="src/java/org/apache/lucene/analysis/charfilter"
-           nobak="on" inputstreamctor="false"/>
-    <!-- Remove the inappropriate JFlex-generated constructor -->
-    <replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
-                   match="/\*\*\s*\*\s*Creates a new scanner\s*\*\s*\*\s*@param\s*in\s*the java.io.Reader to read input from\.\s*\*/\s*public HTMLStripCharFilter\(java\.io\.Reader in\)\s*\{\s*this.zzReader = in;\s*\}"
-                   replace="" flags="s"/>
+  <!-- Because of a bug in JFlex's ant task, HTMLStripCharFilter has to be generated last.   -->
+  <!-- Otherwise the "%apiprivate" option used in its specification will leak into following -->
+  <!-- ant task invocations.                                                                 -->
+  <target name="jflex" depends="init,clean-jflex,-jflex-wiki-tokenizer,-jflex-ClassicAnalyzer,
+                                -jflex-UAX29URLEmailTokenizer,-jflex-HTMLStripCharFilter"/>
+
+  <target name="-jflex-HTMLStripCharFilter" depends="-install-jflex,generate-jflex-html-char-entities">
+    <run-jflex dir="src/java/org/apache/lucene/analysis/charfilter" name="HTMLStripCharFilter"/>
   </target>
 
   <target name="generate-jflex-html-char-entities">
@@ -58,17 +54,17 @@
     <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>
   </target>
 
-  <target name="-jflex-wiki-tokenizer" depends="init,-install-jflex">
+  <target name="-jflex-wiki-tokenizer" depends="-install-jflex">
     <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-UAX29URLEmailTokenizer" depends="init,-install-jflex">
-    <run-jflex-and-disable-buffer-expansion
-        dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
+  <target name="-jflex-ClassicAnalyzer" depends="-install-jflex">
+    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-ClassicAnalyzer" depends="init,-install-jflex">
-    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
+  <target name="-jflex-UAX29URLEmailTokenizer" depends="-install-jflex">
+    <run-jflex-and-disable-buffer-expansion
+        dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
   </target>
 
   <target name="clean-jflex">

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
index a236497..ae67bde 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -95,127 +95,152 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     "\32\0\1\41\11\0\1\1\12\0\1\1\1\0\1\2\2\0\1\1"+
     "\5\0\27\1\1\0\37\1\1\0\u01ca\1\4\0\14\1\16\0\5\1"+
     "\7\0\1\1\1\0\1\1\21\0\160\2\5\1\1\0\2\1\2\0"+
-    "\4\1\10\0\1\1\1\2\3\1\1\0\1\1\1\0\24\1\1\0"+
-    "\123\1\1\0\213\1\1\0\5\2\2\0\236\1\11\0\46\1\2\0"+
-    "\1\1\7\0\47\1\11\0\55\2\1\0\1\2\1\0\2\2\1\0"+
-    "\2\2\1\0\1\2\10\0\33\1\5\0\3\1\35\0\13\2\5\0"+
-    "\53\1\25\2\12\111\4\0\2\1\1\2\143\1\1\0\1\1\7\2"+
-    "\2\0\6\2\2\1\2\2\1\0\4\2\2\1\12\111\3\1\2\0"+
-    "\1\1\20\0\1\1\1\2\36\1\33\2\2\0\131\1\13\2\1\1"+
-    "\16\0\12\111\41\1\11\2\2\1\4\0\1\1\5\0\26\1\4\2"+
-    "\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\104\0"+
-    "\1\1\1\0\13\1\67\0\33\2\1\0\4\2\66\1\3\2\1\1"+
-    "\22\2\1\1\7\2\12\1\2\2\2\0\12\111\1\0\7\1\1\0"+
-    "\7\1\1\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+
-    "\7\1\1\0\1\1\3\0\4\1\2\0\1\2\1\1\7\2\2\0"+
-    "\2\2\2\0\3\2\1\1\10\0\1\2\4\0\2\1\1\0\3\1"+
-    "\2\2\2\0\12\111\2\1\17\0\3\2\1\0\6\1\4\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\2\1\1\0\2\1"+
-    "\2\0\1\2\1\0\5\2\4\0\2\2\2\0\3\2\3\0\1\2"+
-    "\7\0\4\1\1\0\1\1\7\0\12\111\2\2\3\1\1\2\13\0"+
-    "\3\2\1\0\11\1\1\0\3\1\1\0\26\1\1\0\7\1\1\0"+
-    "\2\1\1\0\5\1\2\0\1\2\1\1\10\2\1\0\3\2\1\0"+
-    "\3\2\2\0\1\1\17\0\2\1\2\2\2\0\12\111\21\0\3\2"+
-    "\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1"+
-    "\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2\2\0\3\2"+
-    "\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0\12\111\1\0"+
-    "\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1\1\0\4\1"+
-    "\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1\3\0\3\1"+
-    "\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2\2\0\1\1"+
-    "\6\0\1\2\16\0\12\111\21\0\3\2\1\0\10\1\1\0\3\1"+
-    "\1\0\27\1\1\0\12\1\1\0\5\1\3\0\1\1\7\2\1\0"+
-    "\3\2\1\0\4\2\7\0\2\2\1\0\2\1\6\0\2\1\2\2"+
-    "\2\0\12\111\22\0\2\2\1\0\10\1\1\0\3\1\1\0\27\1"+
+    "\4\1\1\0\1\1\6\0\1\1\1\2\3\1\1\0\1\1\1\0"+
+    "\24\1\1\0\123\1\1\0\213\1\1\0\5\2\2\0\246\1\1\0"+
+    "\46\1\2\0\1\1\7\0\47\1\11\0\55\2\1\0\1\2\1\0"+
+    "\2\2\1\0\2\2\1\0\1\2\10\0\33\1\5\0\3\1\35\0"+
+    "\13\2\5\0\53\1\25\2\12\111\4\0\2\1\1\2\143\1\1\0"+
+    "\1\1\7\2\2\0\6\2\2\1\2\2\1\0\4\2\2\1\12\111"+
+    "\3\1\2\0\1\1\20\0\1\1\1\2\36\1\33\2\2\0\131\1"+
+    "\13\2\1\1\16\0\12\111\41\1\11\2\2\1\4\0\1\1\5\0"+
+    "\26\1\4\2\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1"+
+    "\3\2\104\0\25\1\1\0\10\1\26\0\16\2\1\0\41\2\66\1"+
+    "\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0\12\111\1\0"+
+    "\20\1\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\1\1\3\0\4\1\2\0\1\2\1\1\7\2\2\0\2\2"+
+    "\2\0\3\2\1\1\10\0\1\2\4\0\2\1\1\0\3\1\2\2"+
+    "\2\0\12\111\2\1\17\0\3\2\1\0\6\1\4\0\2\1\2\0"+
+    "\26\1\1\0\7\1\1\0\2\1\1\0\2\1\1\0\2\1\2\0"+
+    "\1\2\1\0\5\2\4\0\2\2\2\0\3\2\3\0\1\2\7\0"+
+    "\4\1\1\0\1\1\7\0\12\111\2\2\3\1\1\2\13\0\3\2"+
+    "\1\0\11\1\1\0\3\1\1\0\26\1\1\0\7\1\1\0\2\1"+
+    "\1\0\5\1\2\0\1\2\1\1\10\2\1\0\3\2\1\0\3\2"+
+    "\2\0\1\1\17\0\2\1\2\2\2\0\12\111\11\0\1\1\7\0"+
+    "\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0"+
+    "\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2\2\0"+
+    "\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0\12\111"+
+    "\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1\1\0"+
+    "\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1\3\0"+
+    "\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2\2\0"+
+    "\1\1\6\0\1\2\16\0\12\111\20\0\4\2\1\0\10\1\1\0"+
+    "\3\1\1\0\27\1\1\0\20\1\3\0\1\1\7\2\1\0\3\2"+
+    "\1\0\4\2\7\0\2\2\1\0\3\1\5\0\2\1\2\2\2\0"+
+    "\12\111\20\0\1\1\3\2\1\0\10\1\1\0\3\1\1\0\27\1"+
     "\1\0\12\1\1\0\5\1\2\0\1\2\1\1\7\2\1\0\3\2"+
     "\1\0\4\2\7\0\2\2\7\0\1\1\1\0\2\1\2\2\2\0"+
-    "\12\111\1\0\2\1\17\0\2\2\1\0\10\1\1\0\3\1\1\0"+
-    "\51\1\2\0\1\1\7\2\1\0\3\2\1\0\4\2\1\1\10\0"+
-    "\1\2\10\0\2\1\2\2\2\0\12\111\12\0\6\1\2\0\2\2"+
-    "\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0\7\1"+
-    "\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\22\0\2\2"+
-    "\15\0\60\1\1\2\2\1\7\2\5\0\7\1\10\2\1\0\12\111"+
-    "\47\0\2\1\1\0\1\1\2\0\2\1\1\0\1\1\2\0\1\1"+
-    "\6\0\4\1\1\0\7\1\1\0\3\1\1\0\1\1\1\0\1\1"+
-    "\2\0\2\1\1\0\4\1\1\2\2\1\6\2\1\0\2\2\1\1"+
-    "\2\0\5\1\1\0\1\1\1\0\6\2\2\0\12\111\2\0\4\1"+
-    "\40\0\1\1\27\0\2\2\6\0\12\111\13\0\1\2\1\0\1\2"+
-    "\1\0\1\2\4\0\2\2\10\1\1\0\44\1\4\0\24\2\1\0"+
-    "\2\2\5\1\13\2\1\0\44\2\11\0\1\2\71\0\53\1\24\2"+
-    "\1\1\12\111\6\0\6\1\4\2\4\1\3\2\1\1\3\2\2\1"+
-    "\7\2\3\1\4\2\15\1\14\2\1\1\1\2\12\111\4\2\2\0"+
-    "\46\1\1\0\1\1\5\0\1\1\2\0\53\1\1\0\u014d\1\1\0"+
-    "\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0"+
-    "\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0"+
-    "\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0"+
-    "\3\2\11\0\11\2\16\0\20\1\20\0\125\1\14\0\u026c\1\2\0"+
-    "\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+
-    "\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+
-    "\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+
-    "\1\1\1\2\2\0\12\111\41\0\3\2\2\0\12\111\6\0\130\1"+
-    "\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+
-    "\4\0\14\2\12\0\12\111\36\1\2\0\5\1\13\0\54\1\4\0"+
-    "\21\2\7\1\2\2\6\0\12\111\1\2\45\0\27\1\5\2\4\0"+
-    "\65\1\12\2\1\0\35\2\2\0\1\2\12\111\6\0\12\111\15\0"+
-    "\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\111\21\0\11\2"+
-    "\14\0\3\2\36\1\15\2\2\1\12\111\54\1\16\2\14\0\44\1"+
-    "\24\2\10\0\12\111\3\0\3\1\12\111\44\1\122\0\3\2\1\0"+
-    "\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2\25\0"+
-    "\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1"+
-    "\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1"+
-    "\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1"+
-    "\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41"+
-    "\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41"+
-    "\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2"+
-    "\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1"+
-    "\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1"+
-    "\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1"+
-    "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1"+
-    "\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2"+
-    "\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0"+
-    "\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1"+
-    "\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0"+
-    "\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0"+
-    "\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\111"+
-    "\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0\1\2"+
-    "\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
-    "\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2\27\1"+
-    "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\111\6\0"+
-    "\22\2\6\1\3\0\1\1\4\0\12\111\34\1\10\2\2\0\27\1"+
-    "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\111"+
-    "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\111"+
-    "\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1\3\2"+
-    "\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1\2\0"+
-    "\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1\2\0"+
-    "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+
-    "\2\0\12\111\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u016e\1"+
-    "\2\0\152\1\46\0\7\1\14\0\5\1\5\0\1\1\1\2\12\1"+
-    "\1\0\15\1\1\0\5\1\1\0\1\1\1\0\2\1\1\0\2\1"+
-    "\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1"+
-    "\4\0\20\2\20\0\7\2\14\0\2\2\30\0\3\2\40\0\5\1"+
-    "\1\0\207\1\23\0\12\111\7\0\32\1\4\0\1\2\1\0\32\1"+
-    "\13\0\131\1\3\0\6\1\2\0\6\1\2\0\6\1\2\0\3\1"+
-    "\43\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1\1\0\17\1"+
-    "\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\2\202\0\35\1"+
-    "\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1\2\0\44\1"+
-    "\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\111\u0356\0\6\1"+
-    "\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1\2\0\27\1"+
-    "\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1\100\0\1\1"+
-    "\3\2\1\0\2\2\5\0\4\2\4\1\1\0\3\1\1\0\33\1"+
-    "\4\0\3\2\4\0\1\2\40\0\35\1\203\0\66\1\12\0\26\1"+
-    "\12\0\23\1\215\0\111\1\u03b7\0\3\2\65\1\17\2\37\0\12\111"+
-    "\20\0\3\2\55\1\13\2\25\0\31\1\7\0\12\111\6\0\3\2"+
-    "\44\1\16\2\1\0\12\111\100\0\3\2\60\1\16\2\4\1\13\0"+
-    "\12\111\u04a6\0\53\1\15\2\10\0\12\111\u0936\0\u036f\1\221\0\143\1"+
-    "\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1\13\0\1\1\56\2\20\0"+
-    "\4\2\15\1\u4060\0\2\1\u2163\0\5\2\3\0\6\2\10\0\10\2"+
-    "\2\0\7\2\36\0\4\2\224\0\3\2\u01bb\0\125\1\1\0\107\1"+
-    "\1\0\2\1\2\0\1\1\2\0\2\1\2\0\4\1\1\0\14\1"+
-    "\1\0\1\1\1\0\7\1\1\0\101\1\1\0\4\1\2\0\10\1"+
-    "\1\0\7\1\1\0\34\1\1\0\4\1\1\0\5\1\1\0\1\1"+
-    "\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0\31\1\1\0\37\1"+
-    "\1\0\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1"+
-    "\1\0\37\1\1\0\31\1\1\0\10\1\2\0\62\111\u1600\0\4\1"+
+    "\12\111\1\0\2\1\16\0\3\2\1\0\10\1\1\0\3\1\1\0"+
+    "\51\1\2\0\1\1\7\2\1\0\3\2\1\0\4\2\1\1\5\0"+
+    "\3\1\1\2\7\0\3\1\2\2\2\0\12\111\12\0\6\1\2\0"+
+    "\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0"+
+    "\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\6\0"+
+    "\12\111\2\0\2\2\15\0\60\1\1\2\2\1\7\2\5\0\7\1"+
+    "\10\2\1\0\12\111\47\0\2\1\1\0\1\1\2\0\2\1\1\0"+
+    "\1\1\2\0\1\1\6\0\4\1\1\0\7\1\1\0\3\1\1\0"+
+    "\1\1\1\0\1\1\2\0\2\1\1\0\4\1\1\2\2\1\6\2"+
+    "\1\0\2\2\1\1\2\0\5\1\1\0\1\1\1\0\6\2\2\0"+
+    "\12\111\2\0\4\1\40\0\1\1\27\0\2\2\6\0\12\111\13\0"+
+    "\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1"+
+    "\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0\1\2"+
+    "\71\0\53\1\24\2\1\1\12\111\6\0\6\1\4\2\4\1\3\2"+
+    "\1\1\3\2\2\1\7\2\3\1\4\2\15\1\14\2\1\1\1\2"+
+    "\12\111\4\2\2\0\46\1\1\0\1\1\5\0\1\1\2\0\53\1"+
+    "\1\0\u014d\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1"+
+    "\2\0\51\1\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1"+
+    "\1\0\1\1\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1"+
+    "\2\0\103\1\2\0\3\2\11\0\11\2\16\0\20\1\20\0\126\1"+
+    "\2\0\6\1\3\0\u026c\1\2\0\21\1\1\41\32\1\5\0\113\1"+
+    "\3\0\13\1\7\0\15\1\1\0\4\1\3\2\13\0\22\1\3\2"+
+    "\13\0\22\1\2\2\14\0\15\1\1\0\3\1\1\0\2\2\14\0"+
+    "\64\1\40\2\3\0\1\1\4\0\1\1\1\2\2\0\12\111\41\0"+
+    "\3\2\2\0\12\111\6\0\130\1\10\0\51\1\1\2\1\1\5\0"+
+    "\106\1\12\0\37\1\1\0\14\2\4\0\14\2\12\0\12\111\36\1"+
+    "\2\0\5\1\13\0\54\1\4\0\32\1\6\0\12\111\1\2\45\0"+
+    "\27\1\5\2\4\0\65\1\12\2\1\0\35\2\2\0\1\2\12\111"+
+    "\6\0\12\111\15\0\1\1\10\0\16\2\102\0\5\2\57\1\21\2"+
+    "\7\1\4\0\12\111\21\0\11\2\14\0\3\2\36\1\15\2\2\1"+
+    "\12\111\54\1\16\2\14\0\44\1\24\2\10\0\12\111\3\0\3\1"+
+    "\12\111\44\1\2\0\11\1\107\0\3\2\1\0\25\2\4\1\1\2"+
+    "\4\1\3\2\2\1\1\0\2\2\6\0\300\1\66\2\5\0\5\2"+
+    "\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0"+
+    "\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0"+
+    "\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0"+
+    "\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41\35\0"+
+    "\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41\21\0"+
+    "\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2\3\0"+
+    "\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1\2\0"+
+    "\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1\2\0"+
+    "\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1\1\0"+
+    "\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1\1\0"+
+    "\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2\27\1"+
+    "\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
+    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0\1\41"+
+    "\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1\4\0"+
+    "\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0\51\1"+
+    "\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0\u51d6\1"+
+    "\52\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\111\2\1"+
+    "\24\0\57\1\1\2\4\0\12\2\1\0\37\1\2\2\120\1\2\2"+
+    "\45\0\11\1\2\0\147\1\2\0\44\1\1\0\10\1\77\0\13\1"+
+    "\1\2\3\1\1\2\4\1\1\2\27\1\5\2\30\0\64\1\14\0"+
+    "\2\2\62\1\22\2\12\0\12\111\6\0\22\2\6\1\3\0\1\1"+
+    "\1\0\1\1\2\0\12\111\34\1\10\2\2\0\27\1\15\2\14\0"+
+    "\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\111\6\0\5\1"+
+    "\1\2\12\1\12\111\5\1\1\0\51\1\16\2\11\0\3\1\1\2"+
+    "\10\1\2\2\2\0\12\111\6\0\27\1\3\0\1\1\3\2\62\1"+
+    "\1\2\1\1\3\2\2\1\2\2\5\1\2\2\1\1\1\2\1\1"+
+    "\30\0\3\1\2\0\13\1\5\2\2\0\3\1\2\2\12\0\6\1"+
+    "\2\0\6\1\2\0\6\1\11\0\7\1\1\0\7\1\1\0\53\1"+
+    "\1\0\12\1\12\0\163\1\10\2\1\0\2\2\2\0\12\111\6\0"+
+    "\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u016e\1\2\0\152\1\46\0"+
+    "\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1\1\0"+
+    "\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1\41\0"+
+    "\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2\20\0"+
+    "\20\2\3\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1\23\0"+
+    "\12\111\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1\3\0"+
+    "\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0\14\1\1\0"+
+    "\32\1\1\0\23\1\1\0\2\1\1\0\17\1\2\0\16\1\42\0"+
+    "\173\1\105\0\65\1\210\0\1\2\202\0\35\1\3\0\61\1\17\0"+
+    "\1\2\37\0\40\1\20\0\33\1\5\0\46\1\5\2\5\0\36\1"+
+    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\111"+
+    "\6\0\44\1\4\0\44\1\4\0\50\1\10\0\64\1\234\0\u0137\1"+
+    "\11\0\26\1\12\0\10\1\230\0\6\1\2\0\1\1\1\0\54\1"+
+    "\1\0\2\1\3\0\1\1\2\0\27\1\12\0\27\1\11\0\37\1"+
+    "\101\0\23\1\1\0\2\1\12\0\26\1\12\0\32\1\106\0\70\1"+
+    "\6\0\2\1\100\0\1\1\3\2\1\0\2\2\5\0\4\2\4\1"+
+    "\1\0\3\1\1\0\33\1\4\0\3\2\4\0\1\2\40\0\35\1"+
+    "\3\0\35\1\43\0\10\1\1\0\34\1\2\2\31\0\66\1\12\0"+
+    "\26\1\12\0\23\1\15\0\22\1\156\0\111\1\67\0\63\1\15\0"+
+    "\63\1\u030d\0\3\2\65\1\17\2\37\0\12\111\17\0\4\2\55\1"+
+    "\13\2\25\0\31\1\7\0\12\111\6\0\3\2\44\1\16\2\1\0"+
+    "\12\111\20\0\43\1\1\2\2\0\1\1\11\0\3\2\60\1\16\2"+
+    "\4\1\5\0\3\2\3\0\12\111\1\1\1\0\1\1\43\0\22\1"+
+    "\1\0\31\1\14\2\6\0\1\2\101\0\7\1\1\0\1\1\1\0"+
+    "\4\1\1\0\17\1\1\0\12\1\7\0\57\1\14\2\5\0\12\111"+
+    "\6\0\4\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2"+
+    "\2\0\3\2\2\0\1\1\6\0\1\2\5\0\5\1\2\2\2\0"+
+    "\7\2\3\0\5\2\213\0\65\1\22\2\4\1\5\0\12\111\46\0"+
+    "\60\1\24\2\2\1\1\0\1\1\10\0\12\111\246\0\57\1\7\2"+
+    "\2\0\11\2\27\0\4\1\2\2\42\0\60\1\21\2\3\0\1\1"+
+    "\13\0\12\111\46\0\53\1\15\2\10\0\12\111\66\0\32\1\3\0"+
+    "\17\2\4\0\12\111\u0166\0\100\1\12\111\25\0\1\1\u01c0\0\71\1"+
+    "\u0107\0\11\1\1\0\45\1\10\2\1\0\10\2\1\1\17\0\12\111"+
+    "\30\0\36\1\2\0\26\2\1\0\16\2\u0349\0\u039a\1\146\0\157\1"+
+    "\21\0\304\1\u0abc\0\u042f\1\u0fd1\0\u0247\1\u21b9\0\u0239\1\7\0\37\1"+
+    "\1\0\12\111\146\0\36\1\2\0\5\2\13\0\60\1\7\2\11\0"+
+    "\4\1\14\0\12\111\11\0\25\1\5\0\23\1\u0370\0\105\1\13\0"+
+    "\1\1\56\2\20\0\4\2\15\1\100\0\1\1\37\0\u17ed\1\23\0"+
+    "\u02f3\1\u250d\0\2\1\u0bfe\0\153\1\5\0\15\1\3\0\11\1\7\0"+
+    "\12\1\3\0\2\2\u14c6\0\5\2\3\0\6\2\10\0\10\2\2\0"+
+    "\7\2\36\0\4\2\224\0\3\2\u01bb\0\125\1\1\0\107\1\1\0"+
+    "\2\1\2\0\1\1\2\0\2\1\2\0\4\1\1\0\14\1\1\0"+
+    "\1\1\1\0\7\1\1\0\101\1\1\0\4\1\2\0\10\1\1\0"+
+    "\7\1\1\0\34\1\1\0\4\1\1\0\5\1\1\0\1\1\3\0"+
+    "\7\1\1\0\u0154\1\2\0\31\1\1\0\31\1\1\0\37\1\1\0"+
+    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
+    "\37\1\1\0\31\1\1\0\10\1\2\0\62\111\u0200\0\67\2\4\0"+
+    "\62\2\10\0\1\2\16\0\1\2\26\0\5\2\1\0\17\2\u0550\0"+
+    "\7\2\1\0\21\2\2\0\7\2\1\0\2\2\1\0\5\2\u07d5\0"+
+    "\305\1\13\0\7\2\51\0\104\1\7\2\5\0\12\111\u04a6\0\4\1"+
     "\1\0\33\1\1\0\2\1\1\0\1\1\2\0\1\1\1\0\12\1"+
     "\1\0\4\1\1\0\1\1\1\0\1\1\6\0\1\1\4\0\1\1"+
     "\1\0\1\1\1\0\1\1\1\0\3\1\1\0\2\1\1\0\1\1"+
@@ -223,7 +248,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     "\1\0\2\1\1\0\1\1\2\0\4\1\1\0\7\1\1\0\4\1"+
     "\1\0\4\1\1\0\1\1\1\0\12\1\1\0\21\1\5\0\3\1"+
     "\1\0\5\1\1\0\21\1\u1144\0\ua6d7\1\51\0\u1035\1\13\0\336\1"+
-    "\u3fe2\0\u021e\1\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u06ed\0"+
+    "\2\0\u1682\1\u295e\0\u021e\1\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u06ed\0"+
     "\360\2\uffff\0\uffff\0\ufe12\0";
 
   /** 
@@ -29654,7 +29679,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -29809,11 +29834,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -29950,24 +29975,14 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   private TextSegment entitySegment = new TextSegment(2);
 
   /**
-   * Creates a new HTMLStripCharFilter over the provided Reader.
-   * @param source Reader to strip html tags from.
-   */
-  public HTMLStripCharFilter(Reader source) {
-    super(source);
-    this.zzReader = source;
-  }
-
-  /**
    * Creates a new HTMLStripCharFilter over the provided Reader
    * with the specified start and end tags.
-   * @param source Reader to strip html tags from.
+   * @param in Reader to strip html tags from.
    * @param escapedTags Tags in this set (both start and end tags)
    *  will not be filtered out.
    */
-  public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
-    super(source);
-    this.zzReader = source;
+  public HTMLStripCharFilter(Reader in, Set<String> escapedTags) {
+    this(in);
     if (null != escapedTags) {
       for (String tag : escapedTags) {
         if (tag.equalsIgnoreCase("BR")) {
@@ -30059,7 +30074,15 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   }
 
 
-  
+  /**
+   * Creates a new scanner
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  public HTMLStripCharFilter(java.io.Reader in) {
+    super(in);
+    this.zzReader = in;
+  }
 
 
   /** 
@@ -30072,7 +30095,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     char [] map = new char[0x110000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2836) {
+    while (i < 3340) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -30116,28 +30139,29 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -30420,43 +30444,55 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { if (yylength() == 1) {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+            zzDoEOF();
+          {   return eofReturnValue;
+ }
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { if (yylength() == 1) {
     return zzBuffer[zzStartRead];
   } else {
     outputSegment.append(yytext()); return outputSegment.nextChar();
   }
-          }
-        case 55: break;
-        case 2: 
-          { inputStart = yychar;
+            } 
+            // fall through
+          case 55: break;
+          case 2: 
+            { inputStart = yychar;
   inputSegment.clear();
   inputSegment.append('<');
   yybegin(LEFT_ANGLE_BRACKET);
-          }
-        case 56: break;
-        case 3: 
-          { inputStart = yychar;
+            } 
+            // fall through
+          case 56: break;
+          case 3: 
+            { inputStart = yychar;
   inputSegment.clear();
   inputSegment.append('&');
   yybegin(AMPERSAND);
-          }
-        case 57: break;
-        case 4: 
-          { yypushback(yylength());
+            } 
+            // fall through
+          case 57: break;
+          case 4: 
+            { yypushback(yylength());
     outputSegment = inputSegment;
     outputSegment.restart();
     yybegin(YYINITIAL);
     return outputSegment.nextChar();
-          }
-        case 58: break;
-        case 5: 
-          { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
-          }
-        case 59: break;
-        case 6: 
-          { int matchLength = yylength();
+            } 
+            // fall through
+          case 58: break;
+          case 5: 
+            { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
+            } 
+            // fall through
+          case 59: break;
+          case 6: 
+            { int matchLength = yylength();
     inputSegment.write(zzBuffer, zzStartRead, matchLength);
     if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
       String decimalCharRef = yytext();
@@ -30487,180 +30523,206 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       yybegin(YYINITIAL);
       return outputSegment.nextChar();
     }
-          }
-        case 60: break;
-        case 7: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 60: break;
+          case 7: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
     yybegin(YYINITIAL);
     return outputSegment.nextChar();
-          }
-        case 61: break;
-        case 8: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 61: break;
+          case 8: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(START_TAG_TAIL_INCLUDE);
     } else {
       yybegin(START_TAG_TAIL_SUBSTITUTE);
     }
-          }
-        case 62: break;
-        case 9: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 62: break;
+          case 9: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(START_TAG_TAIL_INCLUDE);
     } else {
       yybegin(START_TAG_TAIL_EXCLUDE);
     }
-          }
-        case 63: break;
-        case 10: 
-          { inputSegment.append('!'); yybegin(BANG);
-          }
-        case 64: break;
-        case 11: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 63: break;
+          case 10: 
+            { inputSegment.append('!'); yybegin(BANG);
+            } 
+            // fall through
+          case 64: break;
+          case 11: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     yybegin(LEFT_ANGLE_BRACKET_SPACE);
-          }
-        case 65: break;
-        case 12: 
-          { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
-          }
-        case 66: break;
-        case 13: 
-          { inputSegment.append(yytext());
-          }
-        case 67: break;
-        case 14: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 65: break;
+          case 12: 
+            { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+            } 
+            // fall through
+          case 66: break;
+          case 13: 
+            { inputSegment.append(yytext());
+            } 
+            // fall through
+          case 67: break;
+          case 14: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += inputSegment.length() + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
-          }
-        case 68: break;
-        case 15: 
-          { 
-          }
-        case 69: break;
-        case 16: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 70: break;
-        case 17: 
-          { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 71: break;
-        case 18: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 68: break;
+          case 15: 
+            { 
+            } 
+            // fall through
+          case 69: break;
+          case 16: 
+            { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 70: break;
+          case 17: 
+            { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 71: break;
+          case 18: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(END_TAG_TAIL_INCLUDE);
     } else {
       yybegin(END_TAG_TAIL_SUBSTITUTE);
     }
-          }
-        case 72: break;
-        case 19: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 72: break;
+          case 19: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(END_TAG_TAIL_INCLUDE);
     } else {
       yybegin(END_TAG_TAIL_EXCLUDE);
     }
-          }
-        case 73: break;
-        case 20: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-          }
-        case 74: break;
-        case 21: 
-          { if (yylength() == 1) {
+            } 
+            // fall through
+          case 73: break;
+          case 20: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 74: break;
+          case 21: 
+            { if (yylength() == 1) {
       return zzBuffer[zzStartRead];
     } else {
       outputSegment.append(yytext()); return outputSegment.nextChar();
     }
-          }
-        case 75: break;
-        case 22: 
-          { previousRestoreState = restoreState;
+            } 
+            // fall through
+          case 75: break;
+          case 22: 
+            { previousRestoreState = restoreState;
     restoreState = SERVER_SIDE_INCLUDE;
     yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 76: break;
-        case 23: 
-          { previousRestoreState = restoreState;
+            } 
+            // fall through
+          case 76: break;
+          case 23: 
+            { previousRestoreState = restoreState;
     restoreState = SERVER_SIDE_INCLUDE;
     yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 77: break;
-        case 24: 
-          { yybegin(restoreState); restoreState = previousRestoreState;
-          }
-        case 78: break;
-        case 25: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 77: break;
+          case 24: 
+            { yybegin(restoreState); restoreState = previousRestoreState;
+            } 
+            // fall through
+          case 78: break;
+          case 25: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
      outputSegment = inputSegment;
      yybegin(YYINITIAL);
      return outputSegment.nextChar();
-          }
-        case 79: break;
-        case 26: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 79: break;
+          case 26: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - 1;
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
     return BLOCK_LEVEL_END_TAG_REPLACEMENT;
-          }
-        case 80: break;
-        case 27: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 80: break;
+          case 27: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += inputSegment.length() + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     outputSegment = inputSegment;
     yybegin(YYINITIAL);
-          }
-        case 81: break;
-        case 28: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 81: break;
+          case 28: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - 1;
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
     return BLOCK_LEVEL_START_TAG_REPLACEMENT;
-          }
-        case 82: break;
-        case 29: 
-          { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 83: break;
-        case 30: 
-          { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 84: break;
-        case 31: 
-          { int length = yylength();
+            } 
+            // fall through
+          case 82: break;
+          case 29: 
+            { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 83: break;
+          case 30: 
+            { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 84: break;
+          case 31: 
+            { int length = yylength();
     inputSegment.write(zzBuffer, zzStartRead, length);
     entitySegment.clear();
     char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
     entitySegment.append(ch);
     outputSegment = entitySegment;
     yybegin(CHARACTER_REFERENCE_TAIL);
-          }
-        case 85: break;
-        case 32: 
-          { int matchLength = yylength();
+            } 
+            // fall through
+          case 85: break;
+          case 32: 
+            { int matchLength = yylength();
     inputSegment.write(zzBuffer, zzStartRead, matchLength);
     if (matchLength <= 6) { // 10FFFF: max 6 hex chars
       String hexCharRef
@@ -30692,18 +30754,20 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       yybegin(YYINITIAL);
       return outputSegment.nextChar();
     }
-          }
-        case 86: break;
-        case 33: 
-          { if (inputSegment.length() > 2) { // Chars between "<!" and "--" - this is not a comment
+            } 
+            // fall through
+          case 86: break;
+          case 33: 
+            { if (inputSegment.length() > 2) { // Chars between "<!" and "--" - this is not a comment
       inputSegment.append(yytext());
     } else {
       yybegin(COMMENT);
     }
-          }
-        case 87: break;
-        case 34: 
-          { yybegin(YYINITIAL);
+            } 
+            // fall through
+          case 87: break;
+          case 34: 
+            { yybegin(YYINITIAL);
     if (escapeBR) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
@@ -30716,23 +30780,26 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.reset();
       return BR_START_TAG_REPLACEMENT;
     }
-          }
-        case 88: break;
-        case 35: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
+            } 
+            // fall through
+          case 88: break;
+          case 35: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
     cumulativeDiff += yychar - inputStart + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
-          }
-        case 89: break;
-        case 36: 
-          { yybegin(SCRIPT);
-          }
-        case 90: break;
-        case 37: 
-          { yybegin(YYINITIAL);
+            } 
+            // fall through
+          case 89: break;
+          case 36: 
+            { yybegin(SCRIPT);
+            } 
+            // fall through
+          case 90: break;
+          case 37: 
+            { yybegin(YYINITIAL);
     if (escapeBR) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
@@ -30745,66 +30812,77 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.reset();
       return BR_END_TAG_REPLACEMENT;
     }
-          }
-        case 91: break;
-        case 38: 
-          { // add (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 91: break;
+          case 38: 
+            { // add (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     yybegin(YYINITIAL);
-          }
-        case 92: break;
-        case 39: 
-          { yybegin(restoreState);
-          }
-        case 93: break;
-        case 40: 
-          { yybegin(STYLE);
-          }
-        case 94: break;
-        case 41: 
-          { yybegin(SCRIPT_COMMENT);
-          }
-        case 95: break;
-        case 42: 
-          { yybegin(STYLE_COMMENT);
-          }
-        case 96: break;
-        case 43: 
-          { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 97: break;
-        case 44: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 98: break;
-        case 45: 
-          { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 99: break;
-        case 46: 
-          { yybegin(STYLE);
+            } 
+            // fall through
+          case 92: break;
+          case 39: 
+            { yybegin(restoreState);
+            } 
+            // fall through
+          case 93: break;
+          case 40: 
+            { yybegin(STYLE);
+            } 
+            // fall through
+          case 94: break;
+          case 41: 
+            { yybegin(SCRIPT_COMMENT);
+            } 
+            // fall through
+          case 95: break;
+          case 42: 
+            { yybegin(STYLE_COMMENT);
+            } 
+            // fall through
+          case 96: break;
+          case 43: 
+            { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 97: break;
+          case 44: 
+            { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 98: break;
+          case 45: 
+            { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 99: break;
+          case 46: 
+            { yybegin(STYLE);
     if (escapeSTYLE) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
       inputStart += 1 + yylength();
       return outputSegment.nextChar();
     }
-          }
-        case 100: break;
-        case 47: 
-          { yybegin(SCRIPT);
+            } 
+            // fall through
+          case 100: break;
+          case 47: 
+            { yybegin(SCRIPT);
     if (escapeSCRIPT) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
       inputStart += 1 + yylength();
       return outputSegment.nextChar();
     }
-          }
-        case 101: break;
-        case 48: 
-          { if (inputSegment.length() > 2) { // Chars between "<!" and "[CDATA[" - this is not a CDATA section
+            } 
+            // fall through
+          case 101: break;
+          case 48: 
+            { if (inputSegment.length() > 2) { // Chars between "<!" and "[CDATA[" - this is not a CDATA section
       inputSegment.append(yytext());
     } else {
       // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
@@ -30814,10 +30892,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.clear();
       yybegin(CDATA);
     }
-          }
-        case 102: break;
-        case 49: 
-          { inputSegment.clear();
+            } 
+            // fall through
+          case 102: break;
+          case 49: 
+            { inputSegment.clear();
     yybegin(YYINITIAL);
     // add (previously matched input length) -- current match and substitution handled below
     cumulativeDiff += yychar - inputStart;
@@ -30837,10 +30916,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
     addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
     return returnValue;
-          }
-        case 103: break;
-        case 50: 
-          { inputSegment.clear();
+            } 
+            // fall through
+          case 103: break;
+          case 50: 
+            { inputSegment.clear();
     yybegin(YYINITIAL);
     // add (previously matched input length) -- current match and substitution handled below
     cumulativeDiff += yychar - inputStart;
@@ -30860,10 +30940,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
     addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
     return returnValue;
-          }
-        case 104: break;
-        case 51: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 104: break;
+          case 51: 
+            { // Handle paired UTF-16 surrogates.
     outputSegment = entitySegment;
     outputSegment.clear();
     String surrogatePair = yytext();
@@ -30888,10 +30969,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     inputSegment.clear();
     yybegin(YYINITIAL);
     return highSurrogate;
-          }
-        case 105: break;
-        case 52: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 105: break;
+          case 52: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     char lowSurrogate = '\u0000';
@@ -30922,10 +31004,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 106: break;
-        case 53: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 106: break;
+          case 53: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     try { // High surrogates are in decimal range [55296, 56319]
@@ -30955,10 +31038,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 107: break;
-        case 54: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 107: break;
+          case 54: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     try { // High surrogates are in decimal range [55296, 56319]
@@ -30991,18 +31075,12 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 108: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            zzDoEOF();
-              {   return eofReturnValue;
- }
-          } 
-          else {
+            } 
+            // fall through
+          case 108: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
index d810d79..8b83de0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
@@ -33,7 +33,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %apiprivate
 %type int
 %final
@@ -50,6 +50,10 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 %xstate START_TAG_TAIL_INCLUDE, START_TAG_TAIL_EXCLUDE, START_TAG_TAIL_SUBSTITUTE
 %xstate STYLE, STYLE_COMMENT
 
+%init{
+  super(in);
+%init}
+
 // From XML 1.0 <http://www.w3.org/TR/xml/>:
 //
 //    [4]  NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [...]
@@ -166,24 +170,14 @@ InlineElment = ( [aAbBiIqQsSuU]                   |
   private TextSegment entitySegment = new TextSegment(2);
 
   /**
-   * Creates a new HTMLStripCharFilter over the provided Reader.
-   * @param source Reader to strip html tags from.
-   */
-  public HTMLStripCharFilter(Reader source) {
-    super(source);
-    this.zzReader = source;
-  }
-
-  /**
    * Creates a new HTMLStripCharFilter over the provided Reader
    * with the specified start and end tags.
-   * @param source Reader to strip html tags from.
+   * @param in Reader to strip html tags from.
    * @param escapedTags Tags in this set (both start and end tags)
    *  will not be filtered out.
    */
-  public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
-    super(source);
-    this.zzReader = source;
+  public HTMLStripCharFilter(Reader in, Set<String> escapedTags) {
+    this(in);
     if (null != escapedTags) {
       for (String tag : escapedTags) {
         if (tag.equalsIgnoreCase("BR")) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
index 7e5105d..31d3d96 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -251,7 +251,7 @@ class ClassicTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -323,11 +323,11 @@ class ClassicTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -436,28 +436,29 @@ public final void getText(CharTermAttribute t) {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -681,55 +682,65 @@ public final void getText(CharTermAttribute t) {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
-          }
-        case 11: break;
-        case 2: 
-          { return ALPHANUM;
-          }
-        case 12: break;
-        case 3: 
-          { return CJ;
-          }
-        case 13: break;
-        case 4: 
-          { return HOST;
-          }
-        case 14: break;
-        case 5: 
-          { return NUM;
-          }
-        case 15: break;
-        case 6: 
-          { return APOSTROPHE;
-          }
-        case 16: break;
-        case 7: 
-          { return COMPANY;
-          }
-        case 17: break;
-        case 8: 
-          { return ACRONYM_DEP;
-          }
-        case 18: break;
-        case 9: 
-          { return ACRONYM;
-          }
-        case 19: break;
-        case 10: 
-          { return EMAIL;
-          }
-        case 20: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            return YYEOF;
-          } 
-          else {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+        return YYEOF;
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+            } 
+            // fall through
+          case 11: break;
+          case 2: 
+            { return ALPHANUM;
+            } 
+            // fall through
+          case 12: break;
+          case 3: 
+            { return CJ;
+            } 
+            // fall through
+          case 13: break;
+          case 4: 
+            { return HOST;
+            } 
+            // fall through
+          case 14: break;
+          case 5: 
+            { return NUM;
+            } 
+            // fall through
+          case 15: break;
+          case 6: 
+            { return APOSTROPHE;
+            } 
+            // fall through
+          case 16: break;
+          case 7: 
+            { return COMPANY;
+            } 
+            // fall through
+          case 17: break;
+          case 8: 
+            { return ACRONYM_DEP;
+            } 
+            // fall through
+          case 18: break;
+          case 9: 
+            { return ACRONYM;
+            } 
+            // fall through
+          case 19: break;
+          case 10: 
+            { return EMAIL;
+            } 
+            // fall through
+          case 20: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
index 842ae51..65848f2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
@@ -32,33 +32,32 @@ import org.apache.lucene.util.AttributeFactory;
  * algorithm, as specified in 
  * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
  * URLs and email addresses are also tokenized according to the relevant RFCs.
- * <p>
- * Tokens produced are of the following types:
- * <ul>
- *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
- *   <li>&lt;NUM&gt;: A number</li>
- *   <li>&lt;URL&gt;: A URL</li>
- *   <li>&lt;EMAIL&gt;: An email address</li>
- *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
- *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
- *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
- * </ul>
  */
 
 public final class UAX29URLEmailTokenizer extends Tokenizer {
   /** A private instance of the JFlex-constructed scanner */
   private final UAX29URLEmailTokenizerImpl scanner;
-  
-  public static final int ALPHANUM          = 0;
-  public static final int NUM               = 1;
-  public static final int SOUTHEAST_ASIAN   = 2;
-  public static final int IDEOGRAPHIC       = 3;
-  public static final int HIRAGANA          = 4;
-  public static final int KATAKANA          = 5;
-  public static final int HANGUL            = 6;
-  public static final int URL               = 7;
-  public static final int EMAIL             = 8;
+
+  /** Alpha/numeric token type */
+  public static final int ALPHANUM = 0;
+  /** Numeric token type */
+  public static final int NUM = 1;
+  /** Southeast Asian token type */
+  public static final int SOUTHEAST_ASIAN = 2;
+  /** Ideographic token type */
+  public static final int IDEOGRAPHIC = 3;
+  /** Hiragana token type */
+  public static final int HIRAGANA = 4;
+  /** Katakana token type */
+  public static final int KATAKANA = 5;
+  /** Hangul token type */
+  public static final int HANGUL = 6;
+  /** URL token type */
+  public static final int URL = 7;
+  /** Email token type */
+  public static final int EMAIL = 8;
+  /** Emoji token type. */
+  public static final int EMOJI = 9;
 
   /** String token types that correspond to token type int constants */
   public static final String [] TOKEN_TYPES = new String [] {
@@ -71,6 +70,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
     StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL],
     "<URL>",
     "<EMAIL>",
+    StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMOJI]
   };
 
   /** Absolute maximum sized token */


[24/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '<EMOJI>' token type.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e8c65da6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e8c65da6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e8c65da6

Branch: refs/heads/branch_7x
Commit: e8c65da6bb8be626242cfba18989e497180e82aa
Parents: 612a1d0
Author: Steve Rowe <sa...@apache.org>
Authored: Tue Jan 8 13:33:49 2019 -0500
Committer: Steve Rowe <sa...@apache.org>
Committed: Tue Jan 8 13:34:37 2019 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |     8 +
 lucene/analysis/common/build.xml                |    32 +-
 .../charfilter/HTMLStripCharFilter.java         |   834 +-
 .../charfilter/HTMLStripCharFilter.jflex        |    22 +-
 .../analysis/standard/ClassicTokenizerImpl.java |   141 +-
 .../standard/UAX29URLEmailTokenizer.java        |    44 +-
 .../standard/UAX29URLEmailTokenizerImpl.java    | 74906 +++++++++--------
 .../standard/UAX29URLEmailTokenizerImpl.jflex   |   216 +-
 .../wikipedia/WikipediaTokenizerImpl.java       |   465 +-
 .../charfilter/HTMLStripCharFilterTest.java     |     2 +-
 .../standard/TestUAX29URLEmailAnalyzer.java     |     4 +-
 .../standard/TestUAX29URLEmailTokenizer.java    |    76 +-
 lucene/common-build.xml                         |    21 +-
 .../src/data/jflex/UnicodeEmojiProperties.jflex |    25 +
 .../src/data/jflex/getUnicodeEmojiProperties.pl |   168 +
 lucene/core/src/data/jflex/skeleton.default     |   342 +
 .../jflex/skeleton.disable.buffer.expansion.txt |   348 +
 .../standard/StandardTokenizerImpl.java         |   637 +-
 .../standard/StandardTokenizerImpl.jflex        |   206 +-
 .../analysis/standard/TestStandardAnalyzer.java |   131 +-
 .../EmojiTokenizationTestUnicode_11_0.java      | 10756 +++
 .../standard/WordBreakTestUnicode_6_3_0.java    |  5537 --
 .../standard/WordBreakTestUnicode_9_0_0.java    |  8276 ++
 .../standard/generateEmojiTokenizationTest.pl   |   150 +
 .../generateJavaUnicodeWordBreakTest.pl         |    41 +-
 25 files changed, 62395 insertions(+), 40993 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 52c3939..f18e76b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -5,6 +5,11 @@ http://s.apache.org/luceneversions
 
 ======================= Lucene 7.7.0 =======================
 
+Changes in Runtime Behavior
+
+* LUCENE-8527: StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0,
+  and provide Unicode UTS#51 v11.0 Emoji tokenization with the "<EMOJI>" token type. 
+
 Build
 
 * LUCENE-8611: Update randomizedtesting to 2.7.2, JUnit to 4.12, add hamcrest-core 
@@ -57,6 +62,9 @@ Improvements
 
 * LUCENE-8581: Change LatLonShape encoding to use 4 bytes Per Dimension.
   (Ignacio Vera, Nick Knize, Adrien Grand)
+  
+* LUCENE-8527: Upgrade JFlex dependency to 1.7.0; in StandardTokenizer and UAX29URLEmailTokenizer,
+  increase supported Unicode version from 6.3 to 9.0, and support Unicode UTS#51 v11.0 Emoji tokenization.
 
 Optimizations
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/build.xml b/lucene/analysis/common/build.xml
index b8eb37a..f752ecc 100644
--- a/lucene/analysis/common/build.xml
+++ b/lucene/analysis/common/build.xml
@@ -33,18 +33,14 @@
 
   <property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
 
-  <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-ClassicAnalyzer,-jflex-UAX29URLEmailTokenizer,
-                                -jflex-wiki-tokenizer,-jflex-HTMLStripCharFilter"/>
-
-  <target name="-jflex-HTMLStripCharFilter"
-          depends="init,generate-jflex-html-char-entities">
-    <jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex"
-           outdir="src/java/org/apache/lucene/analysis/charfilter"
-           nobak="on" inputstreamctor="false"/>
-    <!-- Remove the inappropriate JFlex-generated constructor -->
-    <replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
-                   match="/\*\*\s*\*\s*Creates a new scanner\s*\*\s*\*\s*@param\s*in\s*the java.io.Reader to read input from\.\s*\*/\s*public HTMLStripCharFilter\(java\.io\.Reader in\)\s*\{\s*this.zzReader = in;\s*\}"
-                   replace="" flags="s"/>
+  <!-- Because of a bug in JFlex's ant task, HTMLStripCharFilter has to be generated last.   -->
+  <!-- Otherwise the "%apiprivate" option used in its specification will leak into following -->
+  <!-- ant task invocations.                                                                 -->
+  <target name="jflex" depends="init,clean-jflex,-jflex-wiki-tokenizer,-jflex-ClassicAnalyzer,
+                                -jflex-UAX29URLEmailTokenizer,-jflex-HTMLStripCharFilter"/>
+
+  <target name="-jflex-HTMLStripCharFilter" depends="-install-jflex,generate-jflex-html-char-entities">
+    <run-jflex dir="src/java/org/apache/lucene/analysis/charfilter" name="HTMLStripCharFilter"/>
   </target>
 
   <target name="generate-jflex-html-char-entities">
@@ -58,17 +54,17 @@
     <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>
   </target>
 
-  <target name="-jflex-wiki-tokenizer" depends="init,-install-jflex">
+  <target name="-jflex-wiki-tokenizer" depends="-install-jflex">
     <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-UAX29URLEmailTokenizer" depends="init,-install-jflex">
-    <run-jflex-and-disable-buffer-expansion
-        dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
+  <target name="-jflex-ClassicAnalyzer" depends="-install-jflex">
+    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-ClassicAnalyzer" depends="init,-install-jflex">
-    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
+  <target name="-jflex-UAX29URLEmailTokenizer" depends="-install-jflex">
+    <run-jflex-and-disable-buffer-expansion
+        dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
   </target>
 
   <target name="clean-jflex">

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
index a236497..ae67bde 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -95,127 +95,152 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     "\32\0\1\41\11\0\1\1\12\0\1\1\1\0\1\2\2\0\1\1"+
     "\5\0\27\1\1\0\37\1\1\0\u01ca\1\4\0\14\1\16\0\5\1"+
     "\7\0\1\1\1\0\1\1\21\0\160\2\5\1\1\0\2\1\2\0"+
-    "\4\1\10\0\1\1\1\2\3\1\1\0\1\1\1\0\24\1\1\0"+
-    "\123\1\1\0\213\1\1\0\5\2\2\0\236\1\11\0\46\1\2\0"+
-    "\1\1\7\0\47\1\11\0\55\2\1\0\1\2\1\0\2\2\1\0"+
-    "\2\2\1\0\1\2\10\0\33\1\5\0\3\1\35\0\13\2\5\0"+
-    "\53\1\25\2\12\111\4\0\2\1\1\2\143\1\1\0\1\1\7\2"+
-    "\2\0\6\2\2\1\2\2\1\0\4\2\2\1\12\111\3\1\2\0"+
-    "\1\1\20\0\1\1\1\2\36\1\33\2\2\0\131\1\13\2\1\1"+
-    "\16\0\12\111\41\1\11\2\2\1\4\0\1\1\5\0\26\1\4\2"+
-    "\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\104\0"+
-    "\1\1\1\0\13\1\67\0\33\2\1\0\4\2\66\1\3\2\1\1"+
-    "\22\2\1\1\7\2\12\1\2\2\2\0\12\111\1\0\7\1\1\0"+
-    "\7\1\1\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+
-    "\7\1\1\0\1\1\3\0\4\1\2\0\1\2\1\1\7\2\2\0"+
-    "\2\2\2\0\3\2\1\1\10\0\1\2\4\0\2\1\1\0\3\1"+
-    "\2\2\2\0\12\111\2\1\17\0\3\2\1\0\6\1\4\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\2\1\1\0\2\1"+
-    "\2\0\1\2\1\0\5\2\4\0\2\2\2\0\3\2\3\0\1\2"+
-    "\7\0\4\1\1\0\1\1\7\0\12\111\2\2\3\1\1\2\13\0"+
-    "\3\2\1\0\11\1\1\0\3\1\1\0\26\1\1\0\7\1\1\0"+
-    "\2\1\1\0\5\1\2\0\1\2\1\1\10\2\1\0\3\2\1\0"+
-    "\3\2\2\0\1\1\17\0\2\1\2\2\2\0\12\111\21\0\3\2"+
-    "\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1"+
-    "\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2\2\0\3\2"+
-    "\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0\12\111\1\0"+
-    "\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1\1\0\4\1"+
-    "\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1\3\0\3\1"+
-    "\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2\2\0\1\1"+
-    "\6\0\1\2\16\0\12\111\21\0\3\2\1\0\10\1\1\0\3\1"+
-    "\1\0\27\1\1\0\12\1\1\0\5\1\3\0\1\1\7\2\1\0"+
-    "\3\2\1\0\4\2\7\0\2\2\1\0\2\1\6\0\2\1\2\2"+
-    "\2\0\12\111\22\0\2\2\1\0\10\1\1\0\3\1\1\0\27\1"+
+    "\4\1\1\0\1\1\6\0\1\1\1\2\3\1\1\0\1\1\1\0"+
+    "\24\1\1\0\123\1\1\0\213\1\1\0\5\2\2\0\246\1\1\0"+
+    "\46\1\2\0\1\1\7\0\47\1\11\0\55\2\1\0\1\2\1\0"+
+    "\2\2\1\0\2\2\1\0\1\2\10\0\33\1\5\0\3\1\35\0"+
+    "\13\2\5\0\53\1\25\2\12\111\4\0\2\1\1\2\143\1\1\0"+
+    "\1\1\7\2\2\0\6\2\2\1\2\2\1\0\4\2\2\1\12\111"+
+    "\3\1\2\0\1\1\20\0\1\1\1\2\36\1\33\2\2\0\131\1"+
+    "\13\2\1\1\16\0\12\111\41\1\11\2\2\1\4\0\1\1\5\0"+
+    "\26\1\4\2\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1"+
+    "\3\2\104\0\25\1\1\0\10\1\26\0\16\2\1\0\41\2\66\1"+
+    "\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0\12\111\1\0"+
+    "\20\1\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\1\1\3\0\4\1\2\0\1\2\1\1\7\2\2\0\2\2"+
+    "\2\0\3\2\1\1\10\0\1\2\4\0\2\1\1\0\3\1\2\2"+
+    "\2\0\12\111\2\1\17\0\3\2\1\0\6\1\4\0\2\1\2\0"+
+    "\26\1\1\0\7\1\1\0\2\1\1\0\2\1\1\0\2\1\2\0"+
+    "\1\2\1\0\5\2\4\0\2\2\2\0\3\2\3\0\1\2\7\0"+
+    "\4\1\1\0\1\1\7\0\12\111\2\2\3\1\1\2\13\0\3\2"+
+    "\1\0\11\1\1\0\3\1\1\0\26\1\1\0\7\1\1\0\2\1"+
+    "\1\0\5\1\2\0\1\2\1\1\10\2\1\0\3\2\1\0\3\2"+
+    "\2\0\1\1\17\0\2\1\2\2\2\0\12\111\11\0\1\1\7\0"+
+    "\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0"+
+    "\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2\2\0"+
+    "\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0\12\111"+
+    "\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1\1\0"+
+    "\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1\3\0"+
+    "\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2\2\0"+
+    "\1\1\6\0\1\2\16\0\12\111\20\0\4\2\1\0\10\1\1\0"+
+    "\3\1\1\0\27\1\1\0\20\1\3\0\1\1\7\2\1\0\3\2"+
+    "\1\0\4\2\7\0\2\2\1\0\3\1\5\0\2\1\2\2\2\0"+
+    "\12\111\20\0\1\1\3\2\1\0\10\1\1\0\3\1\1\0\27\1"+
     "\1\0\12\1\1\0\5\1\2\0\1\2\1\1\7\2\1\0\3\2"+
     "\1\0\4\2\7\0\2\2\7\0\1\1\1\0\2\1\2\2\2\0"+
-    "\12\111\1\0\2\1\17\0\2\2\1\0\10\1\1\0\3\1\1\0"+
-    "\51\1\2\0\1\1\7\2\1\0\3\2\1\0\4\2\1\1\10\0"+
-    "\1\2\10\0\2\1\2\2\2\0\12\111\12\0\6\1\2\0\2\2"+
-    "\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0\7\1"+
-    "\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\22\0\2\2"+
-    "\15\0\60\1\1\2\2\1\7\2\5\0\7\1\10\2\1\0\12\111"+
-    "\47\0\2\1\1\0\1\1\2\0\2\1\1\0\1\1\2\0\1\1"+
-    "\6\0\4\1\1\0\7\1\1\0\3\1\1\0\1\1\1\0\1\1"+
-    "\2\0\2\1\1\0\4\1\1\2\2\1\6\2\1\0\2\2\1\1"+
-    "\2\0\5\1\1\0\1\1\1\0\6\2\2\0\12\111\2\0\4\1"+
-    "\40\0\1\1\27\0\2\2\6\0\12\111\13\0\1\2\1\0\1\2"+
-    "\1\0\1\2\4\0\2\2\10\1\1\0\44\1\4\0\24\2\1\0"+
-    "\2\2\5\1\13\2\1\0\44\2\11\0\1\2\71\0\53\1\24\2"+
-    "\1\1\12\111\6\0\6\1\4\2\4\1\3\2\1\1\3\2\2\1"+
-    "\7\2\3\1\4\2\15\1\14\2\1\1\1\2\12\111\4\2\2\0"+
-    "\46\1\1\0\1\1\5\0\1\1\2\0\53\1\1\0\u014d\1\1\0"+
-    "\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0"+
-    "\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0"+
-    "\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0"+
-    "\3\2\11\0\11\2\16\0\20\1\20\0\125\1\14\0\u026c\1\2\0"+
-    "\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+
-    "\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+
-    "\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+
-    "\1\1\1\2\2\0\12\111\41\0\3\2\2\0\12\111\6\0\130\1"+
-    "\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+
-    "\4\0\14\2\12\0\12\111\36\1\2\0\5\1\13\0\54\1\4\0"+
-    "\21\2\7\1\2\2\6\0\12\111\1\2\45\0\27\1\5\2\4\0"+
-    "\65\1\12\2\1\0\35\2\2\0\1\2\12\111\6\0\12\111\15\0"+
-    "\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\111\21\0\11\2"+
-    "\14\0\3\2\36\1\15\2\2\1\12\111\54\1\16\2\14\0\44\1"+
-    "\24\2\10\0\12\111\3\0\3\1\12\111\44\1\122\0\3\2\1\0"+
-    "\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2\25\0"+
-    "\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1"+
-    "\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1"+
-    "\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1"+
-    "\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41"+
-    "\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41"+
-    "\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2"+
-    "\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1"+
-    "\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1"+
-    "\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1"+
-    "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1"+
-    "\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2"+
-    "\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0"+
-    "\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1"+
-    "\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0"+
-    "\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0"+
-    "\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\111"+
-    "\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0\1\2"+
-    "\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
-    "\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2\27\1"+
-    "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\111\6\0"+
-    "\22\2\6\1\3\0\1\1\4\0\12\111\34\1\10\2\2\0\27\1"+
-    "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\111"+
-    "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\111"+
-    "\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1\3\2"+
-    "\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1\2\0"+
-    "\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1\2\0"+
-    "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+
-    "\2\0\12\111\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u016e\1"+
-    "\2\0\152\1\46\0\7\1\14\0\5\1\5\0\1\1\1\2\12\1"+
-    "\1\0\15\1\1\0\5\1\1\0\1\1\1\0\2\1\1\0\2\1"+
-    "\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1"+
-    "\4\0\20\2\20\0\7\2\14\0\2\2\30\0\3\2\40\0\5\1"+
-    "\1\0\207\1\23\0\12\111\7\0\32\1\4\0\1\2\1\0\32\1"+
-    "\13\0\131\1\3\0\6\1\2\0\6\1\2\0\6\1\2\0\3\1"+
-    "\43\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1\1\0\17\1"+
-    "\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\2\202\0\35\1"+
-    "\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1\2\0\44\1"+
-    "\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\111\u0356\0\6\1"+
-    "\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1\2\0\27\1"+
-    "\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1\100\0\1\1"+
-    "\3\2\1\0\2\2\5\0\4\2\4\1\1\0\3\1\1\0\33\1"+
-    "\4\0\3\2\4\0\1\2\40\0\35\1\203\0\66\1\12\0\26\1"+
-    "\12\0\23\1\215\0\111\1\u03b7\0\3\2\65\1\17\2\37\0\12\111"+
-    "\20\0\3\2\55\1\13\2\25\0\31\1\7\0\12\111\6\0\3\2"+
-    "\44\1\16\2\1\0\12\111\100\0\3\2\60\1\16\2\4\1\13\0"+
-    "\12\111\u04a6\0\53\1\15\2\10\0\12\111\u0936\0\u036f\1\221\0\143\1"+
-    "\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1\13\0\1\1\56\2\20\0"+
-    "\4\2\15\1\u4060\0\2\1\u2163\0\5\2\3\0\6\2\10\0\10\2"+
-    "\2\0\7\2\36\0\4\2\224\0\3\2\u01bb\0\125\1\1\0\107\1"+
-    "\1\0\2\1\2\0\1\1\2\0\2\1\2\0\4\1\1\0\14\1"+
-    "\1\0\1\1\1\0\7\1\1\0\101\1\1\0\4\1\2\0\10\1"+
-    "\1\0\7\1\1\0\34\1\1\0\4\1\1\0\5\1\1\0\1\1"+
-    "\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0\31\1\1\0\37\1"+
-    "\1\0\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1"+
-    "\1\0\37\1\1\0\31\1\1\0\10\1\2\0\62\111\u1600\0\4\1"+
+    "\12\111\1\0\2\1\16\0\3\2\1\0\10\1\1\0\3\1\1\0"+
+    "\51\1\2\0\1\1\7\2\1\0\3\2\1\0\4\2\1\1\5\0"+
+    "\3\1\1\2\7\0\3\1\2\2\2\0\12\111\12\0\6\1\2\0"+
+    "\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0"+
+    "\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\6\0"+
+    "\12\111\2\0\2\2\15\0\60\1\1\2\2\1\7\2\5\0\7\1"+
+    "\10\2\1\0\12\111\47\0\2\1\1\0\1\1\2\0\2\1\1\0"+
+    "\1\1\2\0\1\1\6\0\4\1\1\0\7\1\1\0\3\1\1\0"+
+    "\1\1\1\0\1\1\2\0\2\1\1\0\4\1\1\2\2\1\6\2"+
+    "\1\0\2\2\1\1\2\0\5\1\1\0\1\1\1\0\6\2\2\0"+
+    "\12\111\2\0\4\1\40\0\1\1\27\0\2\2\6\0\12\111\13\0"+
+    "\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1"+
+    "\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0\1\2"+
+    "\71\0\53\1\24\2\1\1\12\111\6\0\6\1\4\2\4\1\3\2"+
+    "\1\1\3\2\2\1\7\2\3\1\4\2\15\1\14\2\1\1\1\2"+
+    "\12\111\4\2\2\0\46\1\1\0\1\1\5\0\1\1\2\0\53\1"+
+    "\1\0\u014d\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1"+
+    "\2\0\51\1\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1"+
+    "\1\0\1\1\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1"+
+    "\2\0\103\1\2\0\3\2\11\0\11\2\16\0\20\1\20\0\126\1"+
+    "\2\0\6\1\3\0\u026c\1\2\0\21\1\1\41\32\1\5\0\113\1"+
+    "\3\0\13\1\7\0\15\1\1\0\4\1\3\2\13\0\22\1\3\2"+
+    "\13\0\22\1\2\2\14\0\15\1\1\0\3\1\1\0\2\2\14\0"+
+    "\64\1\40\2\3\0\1\1\4\0\1\1\1\2\2\0\12\111\41\0"+
+    "\3\2\2\0\12\111\6\0\130\1\10\0\51\1\1\2\1\1\5\0"+
+    "\106\1\12\0\37\1\1\0\14\2\4\0\14\2\12\0\12\111\36\1"+
+    "\2\0\5\1\13\0\54\1\4\0\32\1\6\0\12\111\1\2\45\0"+
+    "\27\1\5\2\4\0\65\1\12\2\1\0\35\2\2\0\1\2\12\111"+
+    "\6\0\12\111\15\0\1\1\10\0\16\2\102\0\5\2\57\1\21\2"+
+    "\7\1\4\0\12\111\21\0\11\2\14\0\3\2\36\1\15\2\2\1"+
+    "\12\111\54\1\16\2\14\0\44\1\24\2\10\0\12\111\3\0\3\1"+
+    "\12\111\44\1\2\0\11\1\107\0\3\2\1\0\25\2\4\1\1\2"+
+    "\4\1\3\2\2\1\1\0\2\2\6\0\300\1\66\2\5\0\5\2"+
+    "\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0"+
+    "\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0"+
+    "\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0"+
+    "\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41\35\0"+
+    "\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41\21\0"+
+    "\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2\3\0"+
+    "\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1\2\0"+
+    "\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1\2\0"+
+    "\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1\1\0"+
+    "\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1\1\0"+
+    "\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2\27\1"+
+    "\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
+    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0\1\41"+
+    "\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1\4\0"+
+    "\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0\51\1"+
+    "\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0\u51d6\1"+
+    "\52\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\111\2\1"+
+    "\24\0\57\1\1\2\4\0\12\2\1\0\37\1\2\2\120\1\2\2"+
+    "\45\0\11\1\2\0\147\1\2\0\44\1\1\0\10\1\77\0\13\1"+
+    "\1\2\3\1\1\2\4\1\1\2\27\1\5\2\30\0\64\1\14\0"+
+    "\2\2\62\1\22\2\12\0\12\111\6\0\22\2\6\1\3\0\1\1"+
+    "\1\0\1\1\2\0\12\111\34\1\10\2\2\0\27\1\15\2\14\0"+
+    "\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\111\6\0\5\1"+
+    "\1\2\12\1\12\111\5\1\1\0\51\1\16\2\11\0\3\1\1\2"+
+    "\10\1\2\2\2\0\12\111\6\0\27\1\3\0\1\1\3\2\62\1"+
+    "\1\2\1\1\3\2\2\1\2\2\5\1\2\2\1\1\1\2\1\1"+
+    "\30\0\3\1\2\0\13\1\5\2\2\0\3\1\2\2\12\0\6\1"+
+    "\2\0\6\1\2\0\6\1\11\0\7\1\1\0\7\1\1\0\53\1"+
+    "\1\0\12\1\12\0\163\1\10\2\1\0\2\2\2\0\12\111\6\0"+
+    "\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u016e\1\2\0\152\1\46\0"+
+    "\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1\1\0"+
+    "\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1\41\0"+
+    "\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2\20\0"+
+    "\20\2\3\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1\23\0"+
+    "\12\111\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1\3\0"+
+    "\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0\14\1\1\0"+
+    "\32\1\1\0\23\1\1\0\2\1\1\0\17\1\2\0\16\1\42\0"+
+    "\173\1\105\0\65\1\210\0\1\2\202\0\35\1\3\0\61\1\17\0"+
+    "\1\2\37\0\40\1\20\0\33\1\5\0\46\1\5\2\5\0\36\1"+
+    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\111"+
+    "\6\0\44\1\4\0\44\1\4\0\50\1\10\0\64\1\234\0\u0137\1"+
+    "\11\0\26\1\12\0\10\1\230\0\6\1\2\0\1\1\1\0\54\1"+
+    "\1\0\2\1\3\0\1\1\2\0\27\1\12\0\27\1\11\0\37\1"+
+    "\101\0\23\1\1\0\2\1\12\0\26\1\12\0\32\1\106\0\70\1"+
+    "\6\0\2\1\100\0\1\1\3\2\1\0\2\2\5\0\4\2\4\1"+
+    "\1\0\3\1\1\0\33\1\4\0\3\2\4\0\1\2\40\0\35\1"+
+    "\3\0\35\1\43\0\10\1\1\0\34\1\2\2\31\0\66\1\12\0"+
+    "\26\1\12\0\23\1\15\0\22\1\156\0\111\1\67\0\63\1\15\0"+
+    "\63\1\u030d\0\3\2\65\1\17\2\37\0\12\111\17\0\4\2\55\1"+
+    "\13\2\25\0\31\1\7\0\12\111\6\0\3\2\44\1\16\2\1\0"+
+    "\12\111\20\0\43\1\1\2\2\0\1\1\11\0\3\2\60\1\16\2"+
+    "\4\1\5\0\3\2\3\0\12\111\1\1\1\0\1\1\43\0\22\1"+
+    "\1\0\31\1\14\2\6\0\1\2\101\0\7\1\1\0\1\1\1\0"+
+    "\4\1\1\0\17\1\1\0\12\1\7\0\57\1\14\2\5\0\12\111"+
+    "\6\0\4\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2"+
+    "\2\0\3\2\2\0\1\1\6\0\1\2\5\0\5\1\2\2\2\0"+
+    "\7\2\3\0\5\2\213\0\65\1\22\2\4\1\5\0\12\111\46\0"+
+    "\60\1\24\2\2\1\1\0\1\1\10\0\12\111\246\0\57\1\7\2"+
+    "\2\0\11\2\27\0\4\1\2\2\42\0\60\1\21\2\3\0\1\1"+
+    "\13\0\12\111\46\0\53\1\15\2\10\0\12\111\66\0\32\1\3\0"+
+    "\17\2\4\0\12\111\u0166\0\100\1\12\111\25\0\1\1\u01c0\0\71\1"+
+    "\u0107\0\11\1\1\0\45\1\10\2\1\0\10\2\1\1\17\0\12\111"+
+    "\30\0\36\1\2\0\26\2\1\0\16\2\u0349\0\u039a\1\146\0\157\1"+
+    "\21\0\304\1\u0abc\0\u042f\1\u0fd1\0\u0247\1\u21b9\0\u0239\1\7\0\37\1"+
+    "\1\0\12\111\146\0\36\1\2\0\5\2\13\0\60\1\7\2\11\0"+
+    "\4\1\14\0\12\111\11\0\25\1\5\0\23\1\u0370\0\105\1\13\0"+
+    "\1\1\56\2\20\0\4\2\15\1\100\0\1\1\37\0\u17ed\1\23\0"+
+    "\u02f3\1\u250d\0\2\1\u0bfe\0\153\1\5\0\15\1\3\0\11\1\7\0"+
+    "\12\1\3\0\2\2\u14c6\0\5\2\3\0\6\2\10\0\10\2\2\0"+
+    "\7\2\36\0\4\2\224\0\3\2\u01bb\0\125\1\1\0\107\1\1\0"+
+    "\2\1\2\0\1\1\2\0\2\1\2\0\4\1\1\0\14\1\1\0"+
+    "\1\1\1\0\7\1\1\0\101\1\1\0\4\1\2\0\10\1\1\0"+
+    "\7\1\1\0\34\1\1\0\4\1\1\0\5\1\1\0\1\1\3\0"+
+    "\7\1\1\0\u0154\1\2\0\31\1\1\0\31\1\1\0\37\1\1\0"+
+    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
+    "\37\1\1\0\31\1\1\0\10\1\2\0\62\111\u0200\0\67\2\4\0"+
+    "\62\2\10\0\1\2\16\0\1\2\26\0\5\2\1\0\17\2\u0550\0"+
+    "\7\2\1\0\21\2\2\0\7\2\1\0\2\2\1\0\5\2\u07d5\0"+
+    "\305\1\13\0\7\2\51\0\104\1\7\2\5\0\12\111\u04a6\0\4\1"+
     "\1\0\33\1\1\0\2\1\1\0\1\1\2\0\1\1\1\0\12\1"+
     "\1\0\4\1\1\0\1\1\1\0\1\1\6\0\1\1\4\0\1\1"+
     "\1\0\1\1\1\0\1\1\1\0\3\1\1\0\2\1\1\0\1\1"+
@@ -223,7 +248,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     "\1\0\2\1\1\0\1\1\2\0\4\1\1\0\7\1\1\0\4\1"+
     "\1\0\4\1\1\0\1\1\1\0\12\1\1\0\21\1\5\0\3\1"+
     "\1\0\5\1\1\0\21\1\u1144\0\ua6d7\1\51\0\u1035\1\13\0\336\1"+
-    "\u3fe2\0\u021e\1\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u06ed\0"+
+    "\2\0\u1682\1\u295e\0\u021e\1\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u06ed\0"+
     "\360\2\uffff\0\uffff\0\ufe12\0";
 
   /** 
@@ -29654,7 +29679,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -29809,11 +29834,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -29950,24 +29975,14 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   private TextSegment entitySegment = new TextSegment(2);
 
   /**
-   * Creates a new HTMLStripCharFilter over the provided Reader.
-   * @param source Reader to strip html tags from.
-   */
-  public HTMLStripCharFilter(Reader source) {
-    super(source);
-    this.zzReader = source;
-  }
-
-  /**
    * Creates a new HTMLStripCharFilter over the provided Reader
    * with the specified start and end tags.
-   * @param source Reader to strip html tags from.
+   * @param in Reader to strip html tags from.
    * @param escapedTags Tags in this set (both start and end tags)
    *  will not be filtered out.
    */
-  public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
-    super(source);
-    this.zzReader = source;
+  public HTMLStripCharFilter(Reader in, Set<String> escapedTags) {
+    this(in);
     if (null != escapedTags) {
       for (String tag : escapedTags) {
         if (tag.equalsIgnoreCase("BR")) {
@@ -30059,7 +30074,15 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   }
 
 
-  
+  /**
+   * Creates a new scanner
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  public HTMLStripCharFilter(java.io.Reader in) {
+    super(in);
+    this.zzReader = in;
+  }
 
 
   /** 
@@ -30072,7 +30095,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     char [] map = new char[0x110000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2836) {
+    while (i < 3340) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -30116,28 +30139,29 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -30420,43 +30444,55 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { if (yylength() == 1) {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+            zzDoEOF();
+          {   return eofReturnValue;
+ }
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { if (yylength() == 1) {
     return zzBuffer[zzStartRead];
   } else {
     outputSegment.append(yytext()); return outputSegment.nextChar();
   }
-          }
-        case 55: break;
-        case 2: 
-          { inputStart = yychar;
+            } 
+            // fall through
+          case 55: break;
+          case 2: 
+            { inputStart = yychar;
   inputSegment.clear();
   inputSegment.append('<');
   yybegin(LEFT_ANGLE_BRACKET);
-          }
-        case 56: break;
-        case 3: 
-          { inputStart = yychar;
+            } 
+            // fall through
+          case 56: break;
+          case 3: 
+            { inputStart = yychar;
   inputSegment.clear();
   inputSegment.append('&');
   yybegin(AMPERSAND);
-          }
-        case 57: break;
-        case 4: 
-          { yypushback(yylength());
+            } 
+            // fall through
+          case 57: break;
+          case 4: 
+            { yypushback(yylength());
     outputSegment = inputSegment;
     outputSegment.restart();
     yybegin(YYINITIAL);
     return outputSegment.nextChar();
-          }
-        case 58: break;
-        case 5: 
-          { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
-          }
-        case 59: break;
-        case 6: 
-          { int matchLength = yylength();
+            } 
+            // fall through
+          case 58: break;
+          case 5: 
+            { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
+            } 
+            // fall through
+          case 59: break;
+          case 6: 
+            { int matchLength = yylength();
     inputSegment.write(zzBuffer, zzStartRead, matchLength);
     if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
       String decimalCharRef = yytext();
@@ -30487,180 +30523,206 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       yybegin(YYINITIAL);
       return outputSegment.nextChar();
     }
-          }
-        case 60: break;
-        case 7: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 60: break;
+          case 7: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
     yybegin(YYINITIAL);
     return outputSegment.nextChar();
-          }
-        case 61: break;
-        case 8: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 61: break;
+          case 8: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(START_TAG_TAIL_INCLUDE);
     } else {
       yybegin(START_TAG_TAIL_SUBSTITUTE);
     }
-          }
-        case 62: break;
-        case 9: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 62: break;
+          case 9: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(START_TAG_TAIL_INCLUDE);
     } else {
       yybegin(START_TAG_TAIL_EXCLUDE);
     }
-          }
-        case 63: break;
-        case 10: 
-          { inputSegment.append('!'); yybegin(BANG);
-          }
-        case 64: break;
-        case 11: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 63: break;
+          case 10: 
+            { inputSegment.append('!'); yybegin(BANG);
+            } 
+            // fall through
+          case 64: break;
+          case 11: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     yybegin(LEFT_ANGLE_BRACKET_SPACE);
-          }
-        case 65: break;
-        case 12: 
-          { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
-          }
-        case 66: break;
-        case 13: 
-          { inputSegment.append(yytext());
-          }
-        case 67: break;
-        case 14: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 65: break;
+          case 12: 
+            { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+            } 
+            // fall through
+          case 66: break;
+          case 13: 
+            { inputSegment.append(yytext());
+            } 
+            // fall through
+          case 67: break;
+          case 14: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += inputSegment.length() + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
-          }
-        case 68: break;
-        case 15: 
-          { 
-          }
-        case 69: break;
-        case 16: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 70: break;
-        case 17: 
-          { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 71: break;
-        case 18: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 68: break;
+          case 15: 
+            { 
+            } 
+            // fall through
+          case 69: break;
+          case 16: 
+            { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 70: break;
+          case 17: 
+            { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 71: break;
+          case 18: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(END_TAG_TAIL_INCLUDE);
     } else {
       yybegin(END_TAG_TAIL_SUBSTITUTE);
     }
-          }
-        case 72: break;
-        case 19: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 72: break;
+          case 19: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(END_TAG_TAIL_INCLUDE);
     } else {
       yybegin(END_TAG_TAIL_EXCLUDE);
     }
-          }
-        case 73: break;
-        case 20: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-          }
-        case 74: break;
-        case 21: 
-          { if (yylength() == 1) {
+            } 
+            // fall through
+          case 73: break;
+          case 20: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 74: break;
+          case 21: 
+            { if (yylength() == 1) {
       return zzBuffer[zzStartRead];
     } else {
       outputSegment.append(yytext()); return outputSegment.nextChar();
     }
-          }
-        case 75: break;
-        case 22: 
-          { previousRestoreState = restoreState;
+            } 
+            // fall through
+          case 75: break;
+          case 22: 
+            { previousRestoreState = restoreState;
     restoreState = SERVER_SIDE_INCLUDE;
     yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 76: break;
-        case 23: 
-          { previousRestoreState = restoreState;
+            } 
+            // fall through
+          case 76: break;
+          case 23: 
+            { previousRestoreState = restoreState;
     restoreState = SERVER_SIDE_INCLUDE;
     yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 77: break;
-        case 24: 
-          { yybegin(restoreState); restoreState = previousRestoreState;
-          }
-        case 78: break;
-        case 25: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 77: break;
+          case 24: 
+            { yybegin(restoreState); restoreState = previousRestoreState;
+            } 
+            // fall through
+          case 78: break;
+          case 25: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
      outputSegment = inputSegment;
      yybegin(YYINITIAL);
      return outputSegment.nextChar();
-          }
-        case 79: break;
-        case 26: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 79: break;
+          case 26: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - 1;
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
     return BLOCK_LEVEL_END_TAG_REPLACEMENT;
-          }
-        case 80: break;
-        case 27: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 80: break;
+          case 27: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += inputSegment.length() + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     outputSegment = inputSegment;
     yybegin(YYINITIAL);
-          }
-        case 81: break;
-        case 28: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 81: break;
+          case 28: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - 1;
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
     return BLOCK_LEVEL_START_TAG_REPLACEMENT;
-          }
-        case 82: break;
-        case 29: 
-          { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 83: break;
-        case 30: 
-          { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 84: break;
-        case 31: 
-          { int length = yylength();
+            } 
+            // fall through
+          case 82: break;
+          case 29: 
+            { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 83: break;
+          case 30: 
+            { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 84: break;
+          case 31: 
+            { int length = yylength();
     inputSegment.write(zzBuffer, zzStartRead, length);
     entitySegment.clear();
     char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
     entitySegment.append(ch);
     outputSegment = entitySegment;
     yybegin(CHARACTER_REFERENCE_TAIL);
-          }
-        case 85: break;
-        case 32: 
-          { int matchLength = yylength();
+            } 
+            // fall through
+          case 85: break;
+          case 32: 
+            { int matchLength = yylength();
     inputSegment.write(zzBuffer, zzStartRead, matchLength);
     if (matchLength <= 6) { // 10FFFF: max 6 hex chars
       String hexCharRef
@@ -30692,18 +30754,20 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       yybegin(YYINITIAL);
       return outputSegment.nextChar();
     }
-          }
-        case 86: break;
-        case 33: 
-          { if (inputSegment.length() > 2) { // Chars between "<!" and "--" - this is not a comment
+            } 
+            // fall through
+          case 86: break;
+          case 33: 
+            { if (inputSegment.length() > 2) { // Chars between "<!" and "--" - this is not a comment
       inputSegment.append(yytext());
     } else {
       yybegin(COMMENT);
     }
-          }
-        case 87: break;
-        case 34: 
-          { yybegin(YYINITIAL);
+            } 
+            // fall through
+          case 87: break;
+          case 34: 
+            { yybegin(YYINITIAL);
     if (escapeBR) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
@@ -30716,23 +30780,26 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.reset();
       return BR_START_TAG_REPLACEMENT;
     }
-          }
-        case 88: break;
-        case 35: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
+            } 
+            // fall through
+          case 88: break;
+          case 35: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
     cumulativeDiff += yychar - inputStart + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
-          }
-        case 89: break;
-        case 36: 
-          { yybegin(SCRIPT);
-          }
-        case 90: break;
-        case 37: 
-          { yybegin(YYINITIAL);
+            } 
+            // fall through
+          case 89: break;
+          case 36: 
+            { yybegin(SCRIPT);
+            } 
+            // fall through
+          case 90: break;
+          case 37: 
+            { yybegin(YYINITIAL);
     if (escapeBR) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
@@ -30745,66 +30812,77 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.reset();
       return BR_END_TAG_REPLACEMENT;
     }
-          }
-        case 91: break;
-        case 38: 
-          { // add (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 91: break;
+          case 38: 
+            { // add (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     yybegin(YYINITIAL);
-          }
-        case 92: break;
-        case 39: 
-          { yybegin(restoreState);
-          }
-        case 93: break;
-        case 40: 
-          { yybegin(STYLE);
-          }
-        case 94: break;
-        case 41: 
-          { yybegin(SCRIPT_COMMENT);
-          }
-        case 95: break;
-        case 42: 
-          { yybegin(STYLE_COMMENT);
-          }
-        case 96: break;
-        case 43: 
-          { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 97: break;
-        case 44: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 98: break;
-        case 45: 
-          { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 99: break;
-        case 46: 
-          { yybegin(STYLE);
+            } 
+            // fall through
+          case 92: break;
+          case 39: 
+            { yybegin(restoreState);
+            } 
+            // fall through
+          case 93: break;
+          case 40: 
+            { yybegin(STYLE);
+            } 
+            // fall through
+          case 94: break;
+          case 41: 
+            { yybegin(SCRIPT_COMMENT);
+            } 
+            // fall through
+          case 95: break;
+          case 42: 
+            { yybegin(STYLE_COMMENT);
+            } 
+            // fall through
+          case 96: break;
+          case 43: 
+            { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 97: break;
+          case 44: 
+            { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 98: break;
+          case 45: 
+            { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 99: break;
+          case 46: 
+            { yybegin(STYLE);
     if (escapeSTYLE) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
       inputStart += 1 + yylength();
       return outputSegment.nextChar();
     }
-          }
-        case 100: break;
-        case 47: 
-          { yybegin(SCRIPT);
+            } 
+            // fall through
+          case 100: break;
+          case 47: 
+            { yybegin(SCRIPT);
     if (escapeSCRIPT) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
       inputStart += 1 + yylength();
       return outputSegment.nextChar();
     }
-          }
-        case 101: break;
-        case 48: 
-          { if (inputSegment.length() > 2) { // Chars between "<!" and "[CDATA[" - this is not a CDATA section
+            } 
+            // fall through
+          case 101: break;
+          case 48: 
+            { if (inputSegment.length() > 2) { // Chars between "<!" and "[CDATA[" - this is not a CDATA section
       inputSegment.append(yytext());
     } else {
       // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
@@ -30814,10 +30892,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.clear();
       yybegin(CDATA);
     }
-          }
-        case 102: break;
-        case 49: 
-          { inputSegment.clear();
+            } 
+            // fall through
+          case 102: break;
+          case 49: 
+            { inputSegment.clear();
     yybegin(YYINITIAL);
     // add (previously matched input length) -- current match and substitution handled below
     cumulativeDiff += yychar - inputStart;
@@ -30837,10 +30916,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
     addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
     return returnValue;
-          }
-        case 103: break;
-        case 50: 
-          { inputSegment.clear();
+            } 
+            // fall through
+          case 103: break;
+          case 50: 
+            { inputSegment.clear();
     yybegin(YYINITIAL);
     // add (previously matched input length) -- current match and substitution handled below
     cumulativeDiff += yychar - inputStart;
@@ -30860,10 +30940,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
     addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
     return returnValue;
-          }
-        case 104: break;
-        case 51: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 104: break;
+          case 51: 
+            { // Handle paired UTF-16 surrogates.
     outputSegment = entitySegment;
     outputSegment.clear();
     String surrogatePair = yytext();
@@ -30888,10 +30969,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     inputSegment.clear();
     yybegin(YYINITIAL);
     return highSurrogate;
-          }
-        case 105: break;
-        case 52: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 105: break;
+          case 52: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     char lowSurrogate = '\u0000';
@@ -30922,10 +31004,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 106: break;
-        case 53: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 106: break;
+          case 53: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     try { // High surrogates are in decimal range [55296, 56319]
@@ -30955,10 +31038,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 107: break;
-        case 54: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 107: break;
+          case 54: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     try { // High surrogates are in decimal range [55296, 56319]
@@ -30991,18 +31075,12 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 108: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            zzDoEOF();
-              {   return eofReturnValue;
- }
-          } 
-          else {
+            } 
+            // fall through
+          case 108: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
index d810d79..8b83de0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
@@ -33,7 +33,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %apiprivate
 %type int
 %final
@@ -50,6 +50,10 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 %xstate START_TAG_TAIL_INCLUDE, START_TAG_TAIL_EXCLUDE, START_TAG_TAIL_SUBSTITUTE
 %xstate STYLE, STYLE_COMMENT
 
+%init{
+  super(in);
+%init}
+
 // From XML 1.0 <http://www.w3.org/TR/xml/>:
 //
 //    [4]  NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [...]
@@ -166,24 +170,14 @@ InlineElment = ( [aAbBiIqQsSuU]                   |
   private TextSegment entitySegment = new TextSegment(2);
 
   /**
-   * Creates a new HTMLStripCharFilter over the provided Reader.
-   * @param source Reader to strip html tags from.
-   */
-  public HTMLStripCharFilter(Reader source) {
-    super(source);
-    this.zzReader = source;
-  }
-
-  /**
    * Creates a new HTMLStripCharFilter over the provided Reader
    * with the specified start and end tags.
-   * @param source Reader to strip html tags from.
+   * @param in Reader to strip html tags from.
    * @param escapedTags Tags in this set (both start and end tags)
    *  will not be filtered out.
    */
-  public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
-    super(source);
-    this.zzReader = source;
+  public HTMLStripCharFilter(Reader in, Set<String> escapedTags) {
+    this(in);
     if (null != escapedTags) {
       for (String tag : escapedTags) {
         if (tag.equalsIgnoreCase("BR")) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
index 7e5105d..31d3d96 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -251,7 +251,7 @@ class ClassicTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -323,11 +323,11 @@ class ClassicTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -436,28 +436,29 @@ public final void getText(CharTermAttribute t) {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -681,55 +682,65 @@ public final void getText(CharTermAttribute t) {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
-          }
-        case 11: break;
-        case 2: 
-          { return ALPHANUM;
-          }
-        case 12: break;
-        case 3: 
-          { return CJ;
-          }
-        case 13: break;
-        case 4: 
-          { return HOST;
-          }
-        case 14: break;
-        case 5: 
-          { return NUM;
-          }
-        case 15: break;
-        case 6: 
-          { return APOSTROPHE;
-          }
-        case 16: break;
-        case 7: 
-          { return COMPANY;
-          }
-        case 17: break;
-        case 8: 
-          { return ACRONYM_DEP;
-          }
-        case 18: break;
-        case 9: 
-          { return ACRONYM;
-          }
-        case 19: break;
-        case 10: 
-          { return EMAIL;
-          }
-        case 20: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            return YYEOF;
-          } 
-          else {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+        return YYEOF;
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+            } 
+            // fall through
+          case 11: break;
+          case 2: 
+            { return ALPHANUM;
+            } 
+            // fall through
+          case 12: break;
+          case 3: 
+            { return CJ;
+            } 
+            // fall through
+          case 13: break;
+          case 4: 
+            { return HOST;
+            } 
+            // fall through
+          case 14: break;
+          case 5: 
+            { return NUM;
+            } 
+            // fall through
+          case 15: break;
+          case 6: 
+            { return APOSTROPHE;
+            } 
+            // fall through
+          case 16: break;
+          case 7: 
+            { return COMPANY;
+            } 
+            // fall through
+          case 17: break;
+          case 8: 
+            { return ACRONYM_DEP;
+            } 
+            // fall through
+          case 18: break;
+          case 9: 
+            { return ACRONYM;
+            } 
+            // fall through
+          case 19: break;
+          case 10: 
+            { return EMAIL;
+            } 
+            // fall through
+          case 20: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
index 842ae51..65848f2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
@@ -32,33 +32,32 @@ import org.apache.lucene.util.AttributeFactory;
  * algorithm, as specified in 
  * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
  * URLs and email addresses are also tokenized according to the relevant RFCs.
- * <p>
- * Tokens produced are of the following types:
- * <ul>
- *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
- *   <li>&lt;NUM&gt;: A number</li>
- *   <li>&lt;URL&gt;: A URL</li>
- *   <li>&lt;EMAIL&gt;: An email address</li>
- *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
- *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
- *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
- * </ul>
  */
 
 public final class UAX29URLEmailTokenizer extends Tokenizer {
   /** A private instance of the JFlex-constructed scanner */
   private final UAX29URLEmailTokenizerImpl scanner;
-  
-  public static final int ALPHANUM          = 0;
-  public static final int NUM               = 1;
-  public static final int SOUTHEAST_ASIAN   = 2;
-  public static final int IDEOGRAPHIC       = 3;
-  public static final int HIRAGANA          = 4;
-  public static final int KATAKANA          = 5;
-  public static final int HANGUL            = 6;
-  public static final int URL               = 7;
-  public static final int EMAIL             = 8;
+
+  /** Alpha/numeric token type */
+  public static final int ALPHANUM = 0;
+  /** Numeric token type */
+  public static final int NUM = 1;
+  /** Southeast Asian token type */
+  public static final int SOUTHEAST_ASIAN = 2;
+  /** Ideographic token type */
+  public static final int IDEOGRAPHIC = 3;
+  /** Hiragana token type */
+  public static final int HIRAGANA = 4;
+  /** Katakana token type */
+  public static final int KATAKANA = 5;
+  /** Hangul token type */
+  public static final int HANGUL = 6;
+  /** URL token type */
+  public static final int URL = 7;
+  /** Email token type */
+  public static final int EMAIL = 8;
+  /** Emoji token type. */
+  public static final int EMOJI = 9;
 
   /** String token types that correspond to token type int constants */
   public static final String [] TOKEN_TYPES = new String [] {
@@ -71,6 +70,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
     StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL],
     "<URL>",
     "<EMAIL>",
+    StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMOJI]
   };
 
   /** Absolute maximum sized token */


[12/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
new file mode 100644
index 0000000..5e99ef4
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
@@ -0,0 +1,10756 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by generateEmojiTokenizationTest.pl
+ * from: http://www.unicode.org/Public/emoji/11.0/emoji-test.txt
+ *
+ * emoji-test.txt contains emoji char sequences, which are represented as
+ * tokenization tests in this class.
+ * 
+ */
+@Ignore
+public class EmojiTokenizationTestUnicode_11_0 extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    for (int i = 0 ; i < tests.length ; i += 2) {
+      String test = tests[i + 1];
+      try {
+        assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
+      } catch (Throwable t) {
+        throw new Exception("Failed to tokenize \"" + tests[i] + "\":", t);        
+      }
+    }
+  }
+
+  private String[] tests = new String[] {
+    "1F600                                      ; fully-qualified     # ๐Ÿ˜€ grinning face",
+    "\uD83D\uDE00",
+
+    "1F601                                      ; fully-qualified     # ๐Ÿ˜ beaming face with smiling eyes",
+    "\uD83D\uDE01",
+
+    "1F602                                      ; fully-qualified     # ๐Ÿ˜‚ face with tears of joy",
+    "\uD83D\uDE02",
+
+    "1F923                                      ; fully-qualified     # ๐Ÿคฃ rolling on the floor laughing",
+    "\uD83E\uDD23",
+
+    "1F603                                      ; fully-qualified     # ๐Ÿ˜ƒ grinning face with big eyes",
+    "\uD83D\uDE03",
+
+    "1F604                                      ; fully-qualified     # ๐Ÿ˜„ grinning face with smiling eyes",
+    "\uD83D\uDE04",
+
+    "1F605                                      ; fully-qualified     # ๐Ÿ˜… grinning face with sweat",
+    "\uD83D\uDE05",
+
+    "1F606                                      ; fully-qualified     # ๐Ÿ˜† grinning squinting face",
+    "\uD83D\uDE06",
+
+    "1F609                                      ; fully-qualified     # ๐Ÿ˜‰ winking face",
+    "\uD83D\uDE09",
+
+    "1F60A                                      ; fully-qualified     # ๐Ÿ˜Š smiling face with smiling eyes",
+    "\uD83D\uDE0A",
+
+    "1F60B                                      ; fully-qualified     # ๐Ÿ˜‹ face savoring food",
+    "\uD83D\uDE0B",
+
+    "1F60E                                      ; fully-qualified     # ๐Ÿ˜Ž smiling face with sunglasses",
+    "\uD83D\uDE0E",
+
+    "1F60D                                      ; fully-qualified     # ๐Ÿ˜ smiling face with heart-eyes",
+    "\uD83D\uDE0D",
+
+    "1F618                                      ; fully-qualified     # ๐Ÿ˜˜ face blowing a kiss",
+    "\uD83D\uDE18",
+
+    "1F970                                      ; fully-qualified     # ๐Ÿฅฐ smiling face with 3 hearts",
+    "\uD83E\uDD70",
+
+    "1F617                                      ; fully-qualified     # ๐Ÿ˜— kissing face",
+    "\uD83D\uDE17",
+
+    "1F619                                      ; fully-qualified     # ๐Ÿ˜™ kissing face with smiling eyes",
+    "\uD83D\uDE19",
+
+    "1F61A                                      ; fully-qualified     # ๐Ÿ˜š kissing face with closed eyes",
+    "\uD83D\uDE1A",
+
+    "263A FE0F                                  ; fully-qualified     # โ˜บ๏ธ smiling face",
+    "\u263A\uFE0F",
+
+    "263A                                       ; non-fully-qualified # โ˜บ smiling face",
+    "\u263A",
+
+    "1F642                                      ; fully-qualified     # ๐Ÿ™‚ slightly smiling face",
+    "\uD83D\uDE42",
+
+    "1F917                                      ; fully-qualified     # ๐Ÿค— hugging face",
+    "\uD83E\uDD17",
+
+    "1F929                                      ; fully-qualified     # ๐Ÿคฉ star-struck",
+    "\uD83E\uDD29",
+
+    "1F914                                      ; fully-qualified     # ๐Ÿค” thinking face",
+    "\uD83E\uDD14",
+
+    "1F928                                      ; fully-qualified     # ๐Ÿคจ face with raised eyebrow",
+    "\uD83E\uDD28",
+
+    "1F610                                      ; fully-qualified     # ๐Ÿ˜ neutral face",
+    "\uD83D\uDE10",
+
+    "1F611                                      ; fully-qualified     # ๐Ÿ˜‘ expressionless face",
+    "\uD83D\uDE11",
+
+    "1F636                                      ; fully-qualified     # ๐Ÿ˜ถ face without mouth",
+    "\uD83D\uDE36",
+
+    "1F644                                      ; fully-qualified     # ๐Ÿ™„ face with rolling eyes",
+    "\uD83D\uDE44",
+
+    "1F60F                                      ; fully-qualified     # ๐Ÿ˜ smirking face",
+    "\uD83D\uDE0F",
+
+    "1F623                                      ; fully-qualified     # ๐Ÿ˜ฃ persevering face",
+    "\uD83D\uDE23",
+
+    "1F625                                      ; fully-qualified     # ๐Ÿ˜ฅ sad but relieved face",
+    "\uD83D\uDE25",
+
+    "1F62E                                      ; fully-qualified     # ๐Ÿ˜ฎ face with open mouth",
+    "\uD83D\uDE2E",
+
+    "1F910                                      ; fully-qualified     # ๐Ÿค zipper-mouth face",
+    "\uD83E\uDD10",
+
+    "1F62F                                      ; fully-qualified     # ๐Ÿ˜ฏ hushed face",
+    "\uD83D\uDE2F",
+
+    "1F62A                                      ; fully-qualified     # ๐Ÿ˜ช sleepy face",
+    "\uD83D\uDE2A",
+
+    "1F62B                                      ; fully-qualified     # ๐Ÿ˜ซ tired face",
+    "\uD83D\uDE2B",
+
+    "1F634                                      ; fully-qualified     # ๐Ÿ˜ด sleeping face",
+    "\uD83D\uDE34",
+
+    "1F60C                                      ; fully-qualified     # ๐Ÿ˜Œ relieved face",
+    "\uD83D\uDE0C",
+
+    "1F61B                                      ; fully-qualified     # ๐Ÿ˜› face with tongue",
+    "\uD83D\uDE1B",
+
+    "1F61C                                      ; fully-qualified     # ๐Ÿ˜œ winking face with tongue",
+    "\uD83D\uDE1C",
+
+    "1F61D                                      ; fully-qualified     # ๐Ÿ˜ squinting face with tongue",
+    "\uD83D\uDE1D",
+
+    "1F924                                      ; fully-qualified     # ๐Ÿคค drooling face",
+    "\uD83E\uDD24",
+
+    "1F612                                      ; fully-qualified     # ๐Ÿ˜’ unamused face",
+    "\uD83D\uDE12",
+
+    "1F613                                      ; fully-qualified     # ๐Ÿ˜“ downcast face with sweat",
+    "\uD83D\uDE13",
+
+    "1F614                                      ; fully-qualified     # ๐Ÿ˜” pensive face",
+    "\uD83D\uDE14",
+
+    "1F615                                      ; fully-qualified     # ๐Ÿ˜• confused face",
+    "\uD83D\uDE15",
+
+    "1F643                                      ; fully-qualified     # ๐Ÿ™ƒ upside-down face",
+    "\uD83D\uDE43",
+
+    "1F911                                      ; fully-qualified     # ๐Ÿค‘ money-mouth face",
+    "\uD83E\uDD11",
+
+    "1F632                                      ; fully-qualified     # ๐Ÿ˜ฒ astonished face",
+    "\uD83D\uDE32",
+
+    "2639 FE0F                                  ; fully-qualified     # โ˜น๏ธ frowning face",
+    "\u2639\uFE0F",
+
+    "2639                                       ; non-fully-qualified # โ˜น frowning face",
+    "\u2639",
+
+    "1F641                                      ; fully-qualified     # ๐Ÿ™ slightly frowning face",
+    "\uD83D\uDE41",
+
+    "1F616                                      ; fully-qualified     # ๐Ÿ˜– confounded face",
+    "\uD83D\uDE16",
+
+    "1F61E                                      ; fully-qualified     # ๐Ÿ˜ž disappointed face",
+    "\uD83D\uDE1E",
+
+    "1F61F                                      ; fully-qualified     # ๐Ÿ˜Ÿ worried face",
+    "\uD83D\uDE1F",
+
+    "1F624                                      ; fully-qualified     # ๐Ÿ˜ค face with steam from nose",
+    "\uD83D\uDE24",
+
+    "1F622                                      ; fully-qualified     # ๐Ÿ˜ข crying face",
+    "\uD83D\uDE22",
+
+    "1F62D                                      ; fully-qualified     # ๐Ÿ˜ญ loudly crying face",
+    "\uD83D\uDE2D",
+
+    "1F626                                      ; fully-qualified     # ๐Ÿ˜ฆ frowning face with open mouth",
+    "\uD83D\uDE26",
+
+    "1F627                                      ; fully-qualified     # ๐Ÿ˜ง anguished face",
+    "\uD83D\uDE27",
+
+    "1F628                                      ; fully-qualified     # ๐Ÿ˜จ fearful face",
+    "\uD83D\uDE28",
+
+    "1F629                                      ; fully-qualified     # ๐Ÿ˜ฉ weary face",
+    "\uD83D\uDE29",
+
+    "1F92F                                      ; fully-qualified     # ๐Ÿคฏ exploding head",
+    "\uD83E\uDD2F",
+
+    "1F62C                                      ; fully-qualified     # ๐Ÿ˜ฌ grimacing face",
+    "\uD83D\uDE2C",
+
+    "1F630                                      ; fully-qualified     # ๐Ÿ˜ฐ anxious face with sweat",
+    "\uD83D\uDE30",
+
+    "1F631                                      ; fully-qualified     # ๐Ÿ˜ฑ face screaming in fear",
+    "\uD83D\uDE31",
+
+    "1F975                                      ; fully-qualified     # ๐Ÿฅต hot face",
+    "\uD83E\uDD75",
+
+    "1F976                                      ; fully-qualified     # ๐Ÿฅถ cold face",
+    "\uD83E\uDD76",
+
+    "1F633                                      ; fully-qualified     # ๐Ÿ˜ณ flushed face",
+    "\uD83D\uDE33",
+
+    "1F92A                                      ; fully-qualified     # ๐Ÿคช zany face",
+    "\uD83E\uDD2A",
+
+    "1F635                                      ; fully-qualified     # ๐Ÿ˜ต dizzy face",
+    "\uD83D\uDE35",
+
+    "1F621                                      ; fully-qualified     # ๐Ÿ˜ก pouting face",
+    "\uD83D\uDE21",
+
+    "1F620                                      ; fully-qualified     # ๐Ÿ˜  angry face",
+    "\uD83D\uDE20",
+
+    "1F92C                                      ; fully-qualified     # ๐Ÿคฌ face with symbols on mouth",
+    "\uD83E\uDD2C",
+
+    "1F637                                      ; fully-qualified     # ๐Ÿ˜ท face with medical mask",
+    "\uD83D\uDE37",
+
+    "1F912                                      ; fully-qualified     # ๐Ÿค’ face with thermometer",
+    "\uD83E\uDD12",
+
+    "1F915                                      ; fully-qualified     # ๐Ÿค• face with head-bandage",
+    "\uD83E\uDD15",
+
+    "1F922                                      ; fully-qualified     # ๐Ÿคข nauseated face",
+    "\uD83E\uDD22",
+
+    "1F92E                                      ; fully-qualified     # ๐Ÿคฎ face vomiting",
+    "\uD83E\uDD2E",
+
+    "1F927                                      ; fully-qualified     # ๐Ÿคง sneezing face",
+    "\uD83E\uDD27",
+
+    "1F607                                      ; fully-qualified     # ๐Ÿ˜‡ smiling face with halo",
+    "\uD83D\uDE07",
+
+    "1F920                                      ; fully-qualified     # ๐Ÿค  cowboy hat face",
+    "\uD83E\uDD20",
+
+    "1F973                                      ; fully-qualified     # ๐Ÿฅณ partying face",
+    "\uD83E\uDD73",
+
+    "1F974                                      ; fully-qualified     # ๐Ÿฅด woozy face",
+    "\uD83E\uDD74",
+
+    "1F97A                                      ; fully-qualified     # ๐Ÿฅบ pleading face",
+    "\uD83E\uDD7A",
+
+    "1F925                                      ; fully-qualified     # ๐Ÿคฅ lying face",
+    "\uD83E\uDD25",
+
+    "1F92B                                      ; fully-qualified     # ๐Ÿคซ shushing face",
+    "\uD83E\uDD2B",
+
+    "1F92D                                      ; fully-qualified     # ๐Ÿคญ face with hand over mouth",
+    "\uD83E\uDD2D",
+
+    "1F9D0                                      ; fully-qualified     # ๐Ÿง face with monocle",
+    "\uD83E\uDDD0",
+
+    "1F913                                      ; fully-qualified     # ๐Ÿค“ nerd face",
+    "\uD83E\uDD13",
+
+    "1F608                                      ; fully-qualified     # ๐Ÿ˜ˆ smiling face with horns",
+    "\uD83D\uDE08",
+
+    "1F47F                                      ; fully-qualified     # ๐Ÿ‘ฟ angry face with horns",
+    "\uD83D\uDC7F",
+
+    "1F921                                      ; fully-qualified     # ๐Ÿคก clown face",
+    "\uD83E\uDD21",
+
+    "1F479                                      ; fully-qualified     # ๐Ÿ‘น ogre",
+    "\uD83D\uDC79",
+
+    "1F47A                                      ; fully-qualified     # ๐Ÿ‘บ goblin",
+    "\uD83D\uDC7A",
+
+    "1F480                                      ; fully-qualified     # ๐Ÿ’€ skull",
+    "\uD83D\uDC80",
+
+    "2620 FE0F                                  ; fully-qualified     # โ˜ ๏ธ skull and crossbones",
+    "\u2620\uFE0F",
+
+    "2620                                       ; non-fully-qualified # โ˜  skull and crossbones",
+    "\u2620",
+
+    "1F47B                                      ; fully-qualified     # ๐Ÿ‘ป ghost",
+    "\uD83D\uDC7B",
+
+    "1F47D                                      ; fully-qualified     # ๐Ÿ‘ฝ alien",
+    "\uD83D\uDC7D",
+
+    "1F47E                                      ; fully-qualified     # ๐Ÿ‘พ alien monster",
+    "\uD83D\uDC7E",
+
+    "1F916                                      ; fully-qualified     # ๐Ÿค– robot face",
+    "\uD83E\uDD16",
+
+    "1F4A9                                      ; fully-qualified     # ๐Ÿ’ฉ pile of poo",
+    "\uD83D\uDCA9",
+
+    "1F63A                                      ; fully-qualified     # ๐Ÿ˜บ grinning cat face",
+    "\uD83D\uDE3A",
+
+    "1F638                                      ; fully-qualified     # ๐Ÿ˜ธ grinning cat face with smiling eyes",
+    "\uD83D\uDE38",
+
+    "1F639                                      ; fully-qualified     # ๐Ÿ˜น cat face with tears of joy",
+    "\uD83D\uDE39",
+
+    "1F63B                                      ; fully-qualified     # ๐Ÿ˜ป smiling cat face with heart-eyes",
+    "\uD83D\uDE3B",
+
+    "1F63C                                      ; fully-qualified     # ๐Ÿ˜ผ cat face with wry smile",
+    "\uD83D\uDE3C",
+
+    "1F63D                                      ; fully-qualified     # ๐Ÿ˜ฝ kissing cat face",
+    "\uD83D\uDE3D",
+
+    "1F640                                      ; fully-qualified     # ๐Ÿ™€ weary cat face",
+    "\uD83D\uDE40",
+
+    "1F63F                                      ; fully-qualified     # ๐Ÿ˜ฟ crying cat face",
+    "\uD83D\uDE3F",
+
+    "1F63E                                      ; fully-qualified     # ๐Ÿ˜พ pouting cat face",
+    "\uD83D\uDE3E",
+
+    "1F648                                      ; fully-qualified     # ๐Ÿ™ˆ see-no-evil monkey",
+    "\uD83D\uDE48",
+
+    "1F649                                      ; fully-qualified     # ๐Ÿ™‰ hear-no-evil monkey",
+    "\uD83D\uDE49",
+
+    "1F64A                                      ; fully-qualified     # ๐Ÿ™Š speak-no-evil monkey",
+    "\uD83D\uDE4A",
+
+    "1F3FB                                      ; fully-qualified     # ๐Ÿป light skin tone",
+    "\uD83C\uDFFB",
+
+    "1F3FC                                      ; fully-qualified     # ๐Ÿผ medium-light skin tone",
+    "\uD83C\uDFFC",
+
+    "1F3FD                                      ; fully-qualified     # ๐Ÿฝ medium skin tone",
+    "\uD83C\uDFFD",
+
+    "1F3FE                                      ; fully-qualified     # ๐Ÿพ medium-dark skin tone",
+    "\uD83C\uDFFE",
+
+    "1F3FF                                      ; fully-qualified     # ๐Ÿฟ dark skin tone",
+    "\uD83C\uDFFF",
+
+    "1F476                                      ; fully-qualified     # ๐Ÿ‘ถ baby",
+    "\uD83D\uDC76",
+
+    "1F476 1F3FB                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿป baby: light skin tone",
+    "\uD83D\uDC76\uD83C\uDFFB",
+
+    "1F476 1F3FC                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿผ baby: medium-light skin tone",
+    "\uD83D\uDC76\uD83C\uDFFC",
+
+    "1F476 1F3FD                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿฝ baby: medium skin tone",
+    "\uD83D\uDC76\uD83C\uDFFD",
+
+    "1F476 1F3FE                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿพ baby: medium-dark skin tone",
+    "\uD83D\uDC76\uD83C\uDFFE",
+
+    "1F476 1F3FF                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿฟ baby: dark skin tone",
+    "\uD83D\uDC76\uD83C\uDFFF",
+
+    "1F9D2                                      ; fully-qualified     # ๐Ÿง’ child",
+    "\uD83E\uDDD2",
+
+    "1F9D2 1F3FB                                ; fully-qualified     # ๐Ÿง’๐Ÿป child: light skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFB",
+
+    "1F9D2 1F3FC                                ; fully-qualified     # ๐Ÿง’๐Ÿผ child: medium-light skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFC",
+
+    "1F9D2 1F3FD                                ; fully-qualified     # ๐Ÿง’๐Ÿฝ child: medium skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFD",
+
+    "1F9D2 1F3FE                                ; fully-qualified     # ๐Ÿง’๐Ÿพ child: medium-dark skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFE",
+
+    "1F9D2 1F3FF                                ; fully-qualified     # ๐Ÿง’๐Ÿฟ child: dark skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFF",
+
+    "1F466                                      ; fully-qualified     # ๐Ÿ‘ฆ boy",
+    "\uD83D\uDC66",
+
+    "1F466 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿป boy: light skin tone",
+    "\uD83D\uDC66\uD83C\uDFFB",
+
+    "1F466 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿผ boy: medium-light skin tone",
+    "\uD83D\uDC66\uD83C\uDFFC",
+
+    "1F466 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿฝ boy: medium skin tone",
+    "\uD83D\uDC66\uD83C\uDFFD",
+
+    "1F466 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿพ boy: medium-dark skin tone",
+    "\uD83D\uDC66\uD83C\uDFFE",
+
+    "1F466 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿฟ boy: dark skin tone",
+    "\uD83D\uDC66\uD83C\uDFFF",
+
+    "1F467                                      ; fully-qualified     # ๐Ÿ‘ง girl",
+    "\uD83D\uDC67",
+
+    "1F467 1F3FB                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿป girl: light skin tone",
+    "\uD83D\uDC67\uD83C\uDFFB",
+
+    "1F467 1F3FC                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿผ girl: medium-light skin tone",
+    "\uD83D\uDC67\uD83C\uDFFC",
+
+    "1F467 1F3FD                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿฝ girl: medium skin tone",
+    "\uD83D\uDC67\uD83C\uDFFD",
+
+    "1F467 1F3FE                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿพ girl: medium-dark skin tone",
+    "\uD83D\uDC67\uD83C\uDFFE",
+
+    "1F467 1F3FF                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿฟ girl: dark skin tone",
+    "\uD83D\uDC67\uD83C\uDFFF",
+
+    "1F9D1                                      ; fully-qualified     # ๐Ÿง‘ adult",
+    "\uD83E\uDDD1",
+
+    "1F9D1 1F3FB                                ; fully-qualified     # ๐Ÿง‘๐Ÿป adult: light skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFB",
+
+    "1F9D1 1F3FC                                ; fully-qualified     # ๐Ÿง‘๐Ÿผ adult: medium-light skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFC",
+
+    "1F9D1 1F3FD                                ; fully-qualified     # ๐Ÿง‘๐Ÿฝ adult: medium skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFD",
+
+    "1F9D1 1F3FE                                ; fully-qualified     # ๐Ÿง‘๐Ÿพ adult: medium-dark skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFE",
+
+    "1F9D1 1F3FF                                ; fully-qualified     # ๐Ÿง‘๐Ÿฟ adult: dark skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFF",
+
+    "1F468                                      ; fully-qualified     # ๐Ÿ‘จ man",
+    "\uD83D\uDC68",
+
+    "1F468 1F3FB                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿป man: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB",
+
+    "1F468 1F3FC                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿผ man: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC",
+
+    "1F468 1F3FD                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝ man: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD",
+
+    "1F468 1F3FE                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿพ man: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE",
+
+    "1F468 1F3FF                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟ man: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF",
+
+    "1F469                                      ; fully-qualified     # ๐Ÿ‘ฉ woman",
+    "\uD83D\uDC69",
+
+    "1F469 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿป woman: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB",
+
+    "1F469 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผ woman: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC",
+
+    "1F469 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝ woman: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD",
+
+    "1F469 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพ woman: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE",
+
+    "1F469 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟ woman: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF",
+
+    "1F9D3                                      ; fully-qualified     # ๐Ÿง“ older adult",
+    "\uD83E\uDDD3",
+
+    "1F9D3 1F3FB                                ; fully-qualified     # ๐Ÿง“๐Ÿป older adult: light skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFB",
+
+    "1F9D3 1F3FC                                ; fully-qualified     # ๐Ÿง“๐Ÿผ older adult: medium-light skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFC",
+
+    "1F9D3 1F3FD                                ; fully-qualified     # ๐Ÿง“๐Ÿฝ older adult: medium skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFD",
+
+    "1F9D3 1F3FE                                ; fully-qualified     # ๐Ÿง“๐Ÿพ older adult: medium-dark skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFE",
+
+    "1F9D3 1F3FF                                ; fully-qualified     # ๐Ÿง“๐Ÿฟ older adult: dark skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFF",
+
+    "1F474                                      ; fully-qualified     # ๐Ÿ‘ด old man",
+    "\uD83D\uDC74",
+
+    "1F474 1F3FB                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿป old man: light skin tone",
+    "\uD83D\uDC74\uD83C\uDFFB",
+
+    "1F474 1F3FC                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿผ old man: medium-light skin tone",
+    "\uD83D\uDC74\uD83C\uDFFC",
+
+    "1F474 1F3FD                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿฝ old man: medium skin tone",
+    "\uD83D\uDC74\uD83C\uDFFD",
+
+    "1F474 1F3FE                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿพ old man: medium-dark skin tone",
+    "\uD83D\uDC74\uD83C\uDFFE",
+
+    "1F474 1F3FF                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿฟ old man: dark skin tone",
+    "\uD83D\uDC74\uD83C\uDFFF",
+
+    "1F475                                      ; fully-qualified     # ๐Ÿ‘ต old woman",
+    "\uD83D\uDC75",
+
+    "1F475 1F3FB                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿป old woman: light skin tone",
+    "\uD83D\uDC75\uD83C\uDFFB",
+
+    "1F475 1F3FC                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿผ old woman: medium-light skin tone",
+    "\uD83D\uDC75\uD83C\uDFFC",
+
+    "1F475 1F3FD                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿฝ old woman: medium skin tone",
+    "\uD83D\uDC75\uD83C\uDFFD",
+
+    "1F475 1F3FE                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿพ old woman: medium-dark skin tone",
+    "\uD83D\uDC75\uD83C\uDFFE",
+
+    "1F475 1F3FF                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿฟ old woman: dark skin tone",
+    "\uD83D\uDC75\uD83C\uDFFF",
+
+    "1F468 200D 2695 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โš•๏ธ man health worker",
+    "\uD83D\uDC68\u200D\u2695\uFE0F",
+
+    "1F468 200D 2695                            ; non-fully-qualified # ๐Ÿ‘จโ€โš• man health worker",
+    "\uD83D\uDC68\u200D\u2695",
+
+    "1F468 1F3FB 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โš•๏ธ man health worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2695\uFE0F",
+
+    "1F468 1F3FB 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โš• man health worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2695",
+
+    "1F468 1F3FC 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โš•๏ธ man health worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2695\uFE0F",
+
+    "1F468 1F3FC 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โš• man health worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2695",
+
+    "1F468 1F3FD 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โš•๏ธ man health worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2695\uFE0F",
+
+    "1F468 1F3FD 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โš• man health worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2695",
+
+    "1F468 1F3FE 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โš•๏ธ man health worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2695\uFE0F",
+
+    "1F468 1F3FE 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โš• man health worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2695",
+
+    "1F468 1F3FF 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โš•๏ธ man health worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2695\uFE0F",
+
+    "1F468 1F3FF 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โš• man health worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2695",
+
+    "1F469 200D 2695 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โš•๏ธ woman health worker",
+    "\uD83D\uDC69\u200D\u2695\uFE0F",
+
+    "1F469 200D 2695                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โš• woman health worker",
+    "\uD83D\uDC69\u200D\u2695",
+
+    "1F469 1F3FB 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โš•๏ธ woman health worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2695\uFE0F",
+
+    "1F469 1F3FB 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โš• woman health worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2695",
+
+    "1F469 1F3FC 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โš•๏ธ woman health worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2695\uFE0F",
+
+    "1F469 1F3FC 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โš• woman health worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2695",
+
+    "1F469 1F3FD 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โš•๏ธ woman health worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2695\uFE0F",
+
+    "1F469 1F3FD 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โš• woman health worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2695",
+
+    "1F469 1F3FE 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โš•๏ธ woman health worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2695\uFE0F",
+
+    "1F469 1F3FE 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โš• woman health worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2695",
+
+    "1F469 1F3FF 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โš•๏ธ woman health worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2695\uFE0F",
+
+    "1F469 1F3FF 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โš• woman health worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2695",
+
+    "1F468 200D 1F393                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽ“ man student",
+    "\uD83D\uDC68\u200D\uD83C\uDF93",
+
+    "1F468 1F3FB 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽ“ man student: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF93",
+
+    "1F468 1F3FC 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽ“ man student: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF93",
+
+    "1F468 1F3FD 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽ“ man student: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF93",
+
+    "1F468 1F3FE 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽ“ man student: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF93",
+
+    "1F468 1F3FF 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽ“ man student: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF93",
+
+    "1F469 200D 1F393                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽ“ woman student",
+    "\uD83D\uDC69\u200D\uD83C\uDF93",
+
+    "1F469 1F3FB 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽ“ woman student: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF93",
+
+    "1F469 1F3FC 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽ“ woman student: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF93",
+
+    "1F469 1F3FD 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽ“ woman student: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF93",
+
+    "1F469 1F3FE 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽ“ woman student: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF93",
+
+    "1F469 1F3FF 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽ“ woman student: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF93",
+
+    "1F468 200D 1F3EB                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿซ man teacher",
+    "\uD83D\uDC68\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FB 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿซ man teacher: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FC 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿซ man teacher: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FD 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿซ man teacher: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FE 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿซ man teacher: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FF 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿซ man teacher: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFEB",
+
+    "1F469 200D 1F3EB                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿซ woman teacher",
+    "\uD83D\uDC69\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FB 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿซ woman teacher: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FC 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿซ woman teacher: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FD 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿซ woman teacher: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FE 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿซ woman teacher: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FF 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿซ woman teacher: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFEB",
+
+    "1F468 200D 2696 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โš–๏ธ man judge",
+    "\uD83D\uDC68\u200D\u2696\uFE0F",
+
+    "1F468 200D 2696                            ; non-fully-qualified # ๐Ÿ‘จโ€โš– man judge",
+    "\uD83D\uDC68\u200D\u2696",
+
+    "1F468 1F3FB 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โš–๏ธ man judge: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2696\uFE0F",
+
+    "1F468 1F3FB 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โš– man judge: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2696",
+
+    "1F468 1F3FC 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โš–๏ธ man judge: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2696\uFE0F",
+
+    "1F468 1F3FC 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โš– man judge: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2696",
+
+    "1F468 1F3FD 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โš–๏ธ man judge: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2696\uFE0F",
+
+    "1F468 1F3FD 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โš– man judge: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2696",
+
+    "1F468 1F3FE 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โš–๏ธ man judge: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2696\uFE0F",
+
+    "1F468 1F3FE 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โš– man judge: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2696",
+
+    "1F468 1F3FF 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โš–๏ธ man judge: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2696\uFE0F",
+
+    "1F468 1F3FF 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โš– man judge: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2696",
+
+    "1F469 200D 2696 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โš–๏ธ woman judge",
+    "\uD83D\uDC69\u200D\u2696\uFE0F",
+
+    "1F469 200D 2696                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โš– woman judge",
+    "\uD83D\uDC69\u200D\u2696",
+
+    "1F469 1F3FB 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โš–๏ธ woman judge: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2696\uFE0F",
+
+    "1F469 1F3FB 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โš– woman judge: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2696",
+
+    "1F469 1F3FC 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โš–๏ธ woman judge: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2696\uFE0F",
+
+    "1F469 1F3FC 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โš– woman judge: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2696",
+
+    "1F469 1F3FD 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โš–๏ธ woman judge: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2696\uFE0F",
+
+    "1F469 1F3FD 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โš– woman judge: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2696",
+
+    "1F469 1F3FE 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โš–๏ธ woman judge: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2696\uFE0F",
+
+    "1F469 1F3FE 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โš– woman judge: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2696",
+
+    "1F469 1F3FF 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โš–๏ธ woman judge: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2696\uFE0F",
+
+    "1F469 1F3FF 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โš– woman judge: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2696",
+
+    "1F468 200D 1F33E                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŒพ man farmer",
+    "\uD83D\uDC68\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FB 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŒพ man farmer: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FC 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŒพ man farmer: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FD 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŒพ man farmer: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FE 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŒพ man farmer: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FF 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŒพ man farmer: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF3E",
+
+    "1F469 200D 1F33E                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŒพ woman farmer",
+    "\uD83D\uDC69\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FB 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŒพ woman farmer: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FC 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŒพ woman farmer: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FD 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŒพ woman farmer: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FE 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŒพ woman farmer: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FF 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŒพ woman farmer: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF3E",
+
+    "1F468 200D 1F373                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿณ man cook",
+    "\uD83D\uDC68\u200D\uD83C\uDF73",
+
+    "1F468 1F3FB 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿณ man cook: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF73",
+
+    "1F468 1F3FC 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿณ man cook: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF73",
+
+    "1F468 1F3FD 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿณ man cook: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF73",
+
+    "1F468 1F3FE 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿณ man cook: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF73",
+
+    "1F468 1F3FF 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿณ man cook: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF73",
+
+    "1F469 200D 1F373                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿณ woman cook",
+    "\uD83D\uDC69\u200D\uD83C\uDF73",
+
+    "1F469 1F3FB 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿณ woman cook: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF73",
+
+    "1F469 1F3FC 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿณ woman cook: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF73",
+
+    "1F469 1F3FD 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿณ woman cook: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF73",
+
+    "1F469 1F3FE 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿณ woman cook: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF73",
+
+    "1F469 1F3FF 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿณ woman cook: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF73",
+
+    "1F468 200D 1F527                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ”ง man mechanic",
+    "\uD83D\uDC68\u200D\uD83D\uDD27",
+
+    "1F468 1F3FB 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ”ง man mechanic: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDD27",
+
+    "1F468 1F3FC 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ”ง man mechanic: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDD27",
+
+    "1F468 1F3FD 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ”ง man mechanic: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDD27",
+
+    "1F468 1F3FE 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ”ง man mechanic: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDD27",
+
+    "1F468 1F3FF 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ”ง man mechanic: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDD27",
+
+    "1F469 200D 1F527                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ”ง woman mechanic",
+    "\uD83D\uDC69\u200D\uD83D\uDD27",
+
+    "1F469 1F3FB 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ”ง woman mechanic: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDD27",
+
+    "1F469 1F3FC 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ”ง woman mechanic: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDD27",
+
+    "1F469 1F3FD 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ”ง woman mechanic: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDD27",
+
+    "1F469 1F3FE 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ”ง woman mechanic: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDD27",
+
+    "1F469 1F3FF 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ”ง woman mechanic: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDD27",
+
+    "1F468 200D 1F3ED                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿญ man factory worker",
+    "\uD83D\uDC68\u200D\uD83C\uDFED",
+
+    "1F468 1F3FB 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿญ man factory worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFED",
+
+    "1F468 1F3FC 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿญ man factory worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFED",
+
+    "1F468 1F3FD 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿญ man factory worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFED",
+
+    "1F468 1F3FE 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿญ man factory worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFED",
+
+    "1F468 1F3FF 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿญ man factory worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFED",
+
+    "1F469 200D 1F3ED                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿญ woman factory worker",
+    "\uD83D\uDC69\u200D\uD83C\uDFED",
+
+    "1F469 1F3FB 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿญ woman factory worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFED",
+
+    "1F469 1F3FC 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿญ woman factory worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFED",
+
+    "1F469 1F3FD 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿญ woman factory worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFED",
+
+    "1F469 1F3FE 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿญ woman factory worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFED",
+
+    "1F469 1F3FF 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿญ woman factory worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFED",
+
+    "1F468 200D 1F4BC                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ’ผ man office worker",
+    "\uD83D\uDC68\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FB 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ’ผ man office worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FC 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ’ผ man office worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FD 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ’ผ man office worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FE 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ผ man office worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FF 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ’ผ man office worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDCBC",
+
+    "1F469 200D 1F4BC                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ’ผ woman office worker",
+    "\uD83D\uDC69\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FB 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ’ผ woman office worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FC 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ’ผ woman office worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FD 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ผ woman office worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FE 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ’ผ woman office worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FF 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ’ผ woman office worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDCBC",
+
+    "1F468 200D 1F52C                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ”ฌ man scientist",
+    "\uD83D\uDC68\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FB 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ”ฌ man scientist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FC 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ”ฌ man scientist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FD 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ”ฌ man scientist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FE 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ”ฌ man scientist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FF 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ”ฌ man scientist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDD2C",
+
+    "1F469 200D 1F52C                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ”ฌ woman scientist",
+    "\uD83D\uDC69\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FB 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ”ฌ woman scientist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FC 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ”ฌ woman scientist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FD 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ”ฌ woman scientist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FE 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ”ฌ woman scientist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FF 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ”ฌ woman scientist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDD2C",
+
+    "1F468 200D 1F4BB                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ’ป man technologist",
+    "\uD83D\uDC68\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FB 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ’ป man technologist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FC 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ’ป man technologist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FD 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ’ป man technologist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FE 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ป man technologist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FF 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ’ป man technologist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDCBB",
+
+    "1F469 200D 1F4BB                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ’ป woman technologist",
+    "\uD83D\uDC69\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FB 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ’ป woman technologist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FC 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ’ป woman technologist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FD 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป woman technologist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FE 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ’ป woman technologist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FF 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ’ป woman technologist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDCBB",
+
+    "1F468 200D 1F3A4                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽค man singer",
+    "\uD83D\uDC68\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FB 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽค man singer: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FC 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽค man singer: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FD 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽค man singer: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FE 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽค man singer: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FF 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽค man singer: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFA4",
+
+    "1F469 200D 1F3A4                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽค woman singer",
+    "\uD83D\uDC69\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FB 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽค woman singer: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FC 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽค woman singer: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FD 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽค woman singer: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FE 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽค woman singer: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FF 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽค woman singer: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFA4",
+
+    "1F468 200D 1F3A8                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽจ man artist",
+    "\uD83D\uDC68\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FB 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽจ man artist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FC 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽจ man artist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FD 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽจ man artist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FE 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽจ man artist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FF 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽจ man artist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFA8",
+
+    "1F469 200D 1F3A8                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽจ woman artist",
+    "\uD83D\uDC69\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FB 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽจ woman artist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FC 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽจ woman artist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FD 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽจ woman artist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FE 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽจ woman artist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FF 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽจ woman artist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFA8",
+
+    "1F468 200D 2708 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โœˆ๏ธ man pilot",
+    "\uD83D\uDC68\u200D\u2708\uFE0F",
+
+    "1F468 200D 2708                            ; non-fully-qualified # ๐Ÿ‘จโ€โœˆ man pilot",
+    "\uD83D\uDC68\u200D\u2708",
+
+    "1F468 1F3FB 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โœˆ๏ธ man pilot: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2708\uFE0F",
+
+    "1F468 1F3FB 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โœˆ man pilot: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2708",
+
+    "1F468 1F3FC 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โœˆ๏ธ man pilot: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2708\uFE0F",
+
+    "1F468 1F3FC 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โœˆ man pilot: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2708",
+
+    "1F468 1F3FD 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โœˆ๏ธ man pilot: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2708\uFE0F",
+
+    "1F468 1F3FD 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โœˆ man pilot: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2708",
+
+    "1F468 1F3FE 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โœˆ๏ธ man pilot: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2708\uFE0F",
+
+    "1F468 1F3FE 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โœˆ man pilot: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2708",
+
+    "1F468 1F3FF 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โœˆ๏ธ man pilot: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2708\uFE0F",
+
+    "1F468 1F3FF 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โœˆ man pilot: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2708",
+
+    "1F469 200D 2708 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โœˆ๏ธ woman pilot",
+    "\uD83D\uDC69\u200D\u2708\uFE0F",
+
+    "1F469 200D 2708                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โœˆ woman pilot",
+    "\uD83D\uDC69\u200D\u2708",
+
+    "1F469 1F3FB 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โœˆ๏ธ woman pilot: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2708\uFE0F",
+
+    "1F469 1F3FB 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โœˆ woman pilot: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2708",
+
+    "1F469 1F3FC 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โœˆ๏ธ woman pilot: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2708\uFE0F",
+
+    "1F469 1F3FC 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โœˆ woman pilot: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2708",
+
+    "1F469 1F3FD 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โœˆ๏ธ woman pilot: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2708\uFE0F",
+
+    "1F469 1F3FD 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โœˆ woman pilot: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2708",
+
+    "1F469 1F3FE 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โœˆ๏ธ woman pilot: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2708\uFE0F",
+
+    "1F469 1F3FE 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โœˆ woman pilot: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2708",
+
+    "1F469 1F3FF 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โœˆ๏ธ woman pilot: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2708\uFE0F",
+
+    "1F469 1F3FF 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โœˆ woman pilot: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2708",
+
+    "1F468 200D 1F680                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿš€ man astronaut",
+    "\uD83D\uDC68\u200D\uD83D\uDE80",
+
+    "1F468 1F3FB 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿš€ man astronaut: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDE80",
+
+    "1F468 1F3FC 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿš€ man astronaut: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDE80",
+
+    "1F468 1F3FD 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿš€ man astronaut: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDE80",
+
+    "1F468 1F3FE 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿš€ man astronaut: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDE80",
+
+    "1F468 1F3FF 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿš€ man astronaut: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDE80",
+
+    "1F469 200D 1F680                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿš€ woman astronaut",
+    "\uD83D\uDC69\u200D\uD83D\uDE80",
+
+    "1F469 1F3FB 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿš€ woman astronaut: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDE80",
+
+    "1F469 1F3FC 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿš€ woman astronaut: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDE80",
+
+    "1F469 1F3FD 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿš€ woman astronaut: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDE80",
+
+    "1F469 1F3FE 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿš€ woman astronaut: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDE80",
+
+    "1F469 1F3FF 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿš€ woman astronaut: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDE80",
+
+    "1F468 200D 1F692                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿš’ man firefighter",
+    "\uD83D\uDC68\u200D\uD83D\uDE92",
+
+    "1F468 1F3FB 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿš’ man firefighter: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDE92",
+
+    "1F468 1F3FC 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿš’ man firefighter: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDE92",
+
+    "1F468 1F3FD 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿš’ man firefighter: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDE92",
+
+    "1F468 1F3FE 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿš’ man firefighter: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDE92",
+
+    "1F468 1F3FF 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿš’ man firefighter: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDE92",
+
+    "1F469 200D 1F692                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿš’ woman firefighter",
+    "\uD83D\uDC69\u200D\uD83D\uDE92",
+
+    "1F469 1F3FB 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿš’ woman firefighter: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDE92",
+
+    "1F469 1F3FC 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿš’ woman firefighter: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDE92",
+
+    "1F469 1F3FD 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿš’ woman firefighter: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDE92",
+
+    "1F469 1F3FE 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿš’ woman firefighter: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDE92",
+
+    "1F469 1F3FF 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿš’ woman firefighter: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDE92",
+
+    "1F46E                                      ; fully-qualified     # ๐Ÿ‘ฎ police officer",
+    "\uD83D\uDC6E",
+
+    "1F46E 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿป police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB",
+
+    "1F46E 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผ police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC",
+
+    "1F46E 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝ police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD",
+
+    "1F46E 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพ police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE",
+
+    "1F46E 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟ police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF",
+
+    "1F46E 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ฎโ€โ™‚๏ธ man police officer",
+    "\uD83D\uDC6E\u200D\u2642\uFE0F",
+
+    "1F46E 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ฎโ€โ™‚ man police officer",
+    "\uD83D\uDC6E\u200D\u2642",
+
+    "1F46E 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚๏ธ man police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚ man police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2642",
+
+    "1F46E 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผโ€โ™‚๏ธ man police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿผโ€โ™‚ man police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2642",
+
+    "1F46E 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝโ€โ™‚๏ธ man police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฝโ€โ™‚ man police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2642",
+
+    "1F46E 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพโ€โ™‚๏ธ man police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿพโ€โ™‚ man police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2642",
+
+    "1F46E 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟโ€โ™‚๏ธ man police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฟโ€โ™‚ man police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2642",
+
+    "1F46E 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ฎโ€โ™€๏ธ woman police officer",
+    "\uD83D\uDC6E\u200D\u2640\uFE0F",
+
+    "1F46E 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ฎโ€โ™€ woman police officer",
+    "\uD83D\uDC6E\u200D\u2640",
+
+    "1F46E 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™€๏ธ woman police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿปโ€โ™€ woman police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2640",
+
+    "1F46E 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผโ€โ™€๏ธ woman police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿผโ€โ™€ woman police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2640",
+
+    "1F46E 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝโ€โ™€๏ธ woman police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฝโ€โ™€ woman police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2640",
+
+    "1F46E 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพโ€โ™€๏ธ woman police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿพโ€โ™€ woman police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2640",
+
+    "1F46E 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ woman police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฟโ€โ™€ woman police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2640",
+
+    "1F575 FE0F                                 ; fully-qualified     # ๐Ÿ•ต๏ธ detective",
+    "\uD83D\uDD75\uFE0F",
+
+    "1F575                                      ; non-fully-qualified # ๐Ÿ•ต detective",
+    "\uD83D\uDD75",
+
+    "1F575 1F3FB                                ; fully-qualified     # ๐Ÿ•ต๐Ÿป detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB",
+
+    "1F575 1F3FC                                ; fully-qualified     # ๐Ÿ•ต๐Ÿผ detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC",
+
+    "1F575 1F3FD                                ; fully-qualified     # ๐Ÿ•ต๐Ÿฝ detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD",
+
+    "1F575 1F3FE                                ; fully-qualified     # ๐Ÿ•ต๐Ÿพ detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE",
+
+    "1F575 1F3FF                                ; fully-qualified     # ๐Ÿ•ต๐Ÿฟ detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF",
+
+    "1F575 FE0F 200D 2642 FE0F                  ; fully-qualified     # ๐Ÿ•ต๏ธโ€โ™‚๏ธ man detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2642\uFE0F",
+
+    "1F575 200D 2642 FE0F                       ; non-fully-qualified # ๐Ÿ•ตโ€โ™‚๏ธ man detective",
+    "\uD83D\uDD75\u200D\u2642\uFE0F",
+
+    "1F575 FE0F 200D 2642                       ; non-fully-qualified # ๐Ÿ•ต๏ธโ€โ™‚ man detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2642",
+
+    "1F575 200D 2642                            ; non-fully-qualified # ๐Ÿ•ตโ€โ™‚ man detective",
+    "\uD83D\uDD75\u200D\u2642",
+
+    "1F575 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿปโ€โ™‚๏ธ man detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F575 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿปโ€โ™‚ man detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2642",
+
+    "1F575 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿผโ€โ™‚๏ธ man detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F575 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿผโ€โ™‚ man detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2642",
+
+    "1F575 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฝโ€โ™‚๏ธ man detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F575 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฝโ€โ™‚ man detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2642",
+
+    "1F575 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿพโ€โ™‚๏ธ man detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F575 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿพโ€โ™‚ man detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2642",
+
+    "1F575 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฟโ€โ™‚๏ธ man detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F575 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฟโ€โ™‚ man detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2642",
+
+    "1F575 FE0F 200D 2640 FE0F                  ; fully-qualified     # ๐Ÿ•ต๏ธโ€โ™€๏ธ woman detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2640\uFE0F",
+
+    "1F575 200D 2640 FE0F                       ; non-fully-qualified # ๐Ÿ•ตโ€โ™€๏ธ woman detective",
+    "\uD83D\uDD75\u200D\u2640\uFE0F",
+
+    "1F575 FE0F 200D 2640                       ; non-fully-qualified # ๐Ÿ•ต๏ธโ€โ™€ woman detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2640",
+
+    "1F575 200D 2640                            ; non-fully-qualified # ๐Ÿ•ตโ€โ™€ woman detective",
+    "\uD83D\uDD75\u200D\u2640",
+
+    "1F575 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿปโ€โ™€๏ธ woman detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F575 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿปโ€โ™€ woman detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2640",
+
+    "1F575 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿผโ€โ™€๏ธ woman detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F575 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿผโ€โ™€ woman detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2640",
+
+    "1F575 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฝโ€โ™€๏ธ woman detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F575 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฝโ€โ™€ woman detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2640",
+
+    "1F575 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿพโ€โ™€๏ธ woman detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F575 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿพโ€โ™€ woman detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2640",
+
+    "1F575 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฟโ€โ™€๏ธ woman detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F575 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฟโ€โ™€ woman detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2640",
+
+    "1F482                                      ; fully-qualified     # ๐Ÿ’‚ guard",
+    "\uD83D\uDC82",
+
+    "1F482 1F3FB                                ; fully-qualified     # ๐Ÿ’‚๐Ÿป guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB",
+
+    "1F482 1F3FC                                ; fully-qualified     # ๐Ÿ’‚๐Ÿผ guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC",
+
+    "1F482 1F3FD                                ; fully-qualified     # ๐Ÿ’‚๐Ÿฝ guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD",
+
+    "1F482 1F3FE                                ; fully-qualified     # ๐Ÿ’‚๐Ÿพ guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE",
+
+    "1F482 1F3FF                                ; fully-qualified     # ๐Ÿ’‚๐Ÿฟ guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF",
+
+    "1F482 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ’‚โ€โ™‚๏ธ man guard",
+    "\uD83D\uDC82\u200D\u2642\uFE0F",
+
+    "1F482 200D 2642                            ; non-fully-qualified # ๐Ÿ’‚โ€โ™‚ man guard",
+    "\uD83D\uDC82\u200D\u2642",
+
+    "1F482 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿปโ€โ™‚๏ธ man guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F482 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿปโ€โ™‚ man guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2642",
+
+    "1F482 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿผโ€โ™‚๏ธ man guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F482 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿผโ€โ™‚ man guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2642",
+
+    "1F482 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฝโ€โ™‚๏ธ man guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F482 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฝโ€โ™‚ man guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2642",
+
+    "1F482 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿพโ€โ™‚๏ธ man guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F482 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿพโ€โ™‚ man guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2642",
+
+    "1F482 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฟโ€โ™‚๏ธ man guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F482 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฟโ€โ™‚ man guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2642",
+
+    "1F482 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ’‚โ€โ™€๏ธ woman guard",
+    "\uD83D\uDC82\u200D\u2640\uFE0F",
+
+    "1F482 200D 2640                            ; non-fully-qualified # ๐Ÿ’‚โ€โ™€ woman guard",
+    "\uD83D\uDC82\u200D\u2640",
+
+    "1F482 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿปโ€โ™€๏ธ woman guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F482 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿปโ€โ™€ woman guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2640",
+
+    "1F482 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿผโ€โ™€๏ธ woman guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F482 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿผโ€โ™€ woman guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2640",
+
+    "1F482 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฝโ€โ™€๏ธ woman guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F482 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฝโ€โ™€ woman guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2640",
+
+    "1F482 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿพโ€โ™€๏ธ woman guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F482 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿพโ€โ™€ woman guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2640",
+
+    "1F482 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฟโ€โ™€๏ธ woman guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F482 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฟโ€โ™€ woman guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2640",
+
+    "1F477                                      ; fully-qualified     # ๐Ÿ‘ท construction worker",
+    "\uD83D\uDC77",
+
+    "1F477 1F3FB                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿป construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB",
+
+    "1F477 1F3FC                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿผ construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC",
+
+    "1F477 1F3FD                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝ construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD",
+
+    "1F477 1F3FE                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿพ construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE",
+
+    "1F477 1F3FF                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟ construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF",
+
+    "1F477 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ทโ€โ™‚๏ธ man construction worker",
+    "\uD83D\uDC77\u200D\u2642\uFE0F",
+
+    "1F477 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ทโ€โ™‚ man construction worker",
+    "\uD83D\uDC77\u200D\u2642",
+
+    "1F477 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿปโ€โ™‚๏ธ man construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F477 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿปโ€โ™‚ man construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2642",
+
+    "1F477 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿผโ€โ™‚๏ธ man construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F477 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿผโ€โ™‚ man construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2642",
+
+    "1F477 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝโ€โ™‚๏ธ man construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F477 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฝโ€โ™‚ man construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2642",
+
+    "1F477 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿพโ€โ™‚๏ธ man construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F477 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿพโ€โ™‚ man construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2642",
+
+    "1F477 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟโ€โ™‚๏ธ man construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F477 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฟโ€โ™‚ man construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2642",
+
+    "1F477 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ทโ€โ™€๏ธ woman construction worker",
+    "\uD83D\uDC77\u200D\u2640\uFE0F",
+
+    "1F477 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ทโ€โ™€ woman construction worker",
+    "\uD83D\uDC77\u200D\u2640",
+
+    "1F477 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿปโ€โ™€๏ธ woman construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F477 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿปโ€โ™€ woman construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2640",
+
+    "1F477 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿผโ€โ™€๏ธ woman construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F477 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿผโ€โ™€ woman construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2640",
+
+    "1F477 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝโ€โ™€๏ธ woman construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F477 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฝโ€โ™€ woman construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2640",
+
+    "1F477 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿพโ€โ™€๏ธ woman construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F477 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿพโ€โ™€ woman construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2640",
+
+    "1F477 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟโ€โ™€๏ธ woman construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F477 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฟโ€โ™€ woman construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2640",
+
+    "1F934                                      ; fully-qualified     # ๐Ÿคด prince",
+    "\uD83E\uDD34",
+
+    "1F934 1F3FB                                ; fully-qualified     # ๐Ÿคด๐Ÿป prince: light skin tone",
+    "\uD83E\uDD34\uD83C\uDFFB",
+
+    "1F934 1F3FC                                ; fully-qualified     # ๐Ÿคด๐Ÿผ prince: medium-light skin tone",
+    "\uD83E\uDD34\uD83C\uDFFC",
+
+    "1F934 1F3FD                                ; fully-qualified     # ๐Ÿคด๐Ÿฝ prince: medium skin tone",
+    "\uD83E\uDD34\uD83C\uDFFD",
+
+    "1F934 1F3FE                                ; fully-qualified     # ๐Ÿคด๐Ÿพ prince: medium-dark skin tone",
+    "\uD83E\uDD34\uD83C\uDFFE",
+
+    "1F934 1F3FF                                ; fully-qualified     # ๐Ÿคด๐Ÿฟ prince: dark skin tone",
+    "\uD83E\uDD34\uD83C\uDFFF",
+
+    "1F478                                      ; fully-qualified     # ๐Ÿ‘ธ princess",
+    "\uD83D\uDC78",
+
+    "1F478 1F3FB                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿป princess: light skin tone",
+    "\uD83D\uDC78\uD83C\uDFFB",
+
+    "1F478 1F3FC                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿผ princess: medium-light skin tone",
+    "\uD83D\uDC78\uD83C\uDFFC",
+
+    "1F478 1F3FD                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿฝ princess: medium skin tone",
+    "\uD83D\uDC78\uD83C\uDFFD",
+
+    "1F478 1F3FE                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿพ princess: medium-dark skin tone",
+    "\uD83D\uDC78\uD83C\uDFFE",
+
+    "1F478 1F3FF                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿฟ princess: dark skin tone",
+    "\uD83D\uDC78\uD83C\uDFFF",
+
+    "1F473                                      ; fully-qualified     # ๐Ÿ‘ณ person wearing turban",
+    "\uD83D\uDC73",
+
+    "1F473 1F3FB                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿป person wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB",
+
+    "1F473 1F3FC                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผ person wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC",
+
+    "1F473 1F3FD                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝ person wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD",
+
+    "1F473 1F3FE                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพ person wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE",
+
+    "1F473 1F3FF                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟ person wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF",
+
+    "1F473 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ณโ€โ™‚๏ธ man wearing turban",
+    "\uD83D\uDC73\u200D\u2642\uFE0F",
+
+    "1F473 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ณโ€โ™‚ man wearing turban",
+    "\uD83D\uDC73\u200D\u2642",
+
+    "1F473 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ man wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F473 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿปโ€โ™‚ man wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2642",
+
+    "1F473 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผโ€โ™‚๏ธ man wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F473 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿผโ€โ™‚ man wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2642",
+
+    "1F473 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝโ€โ™‚๏ธ man wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F473 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฝโ€โ™‚ man wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2642",
+
+    "1F473 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพโ€โ™‚๏ธ man wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F473 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿพโ€โ™‚ man wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2642",
+
+    "1F473 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟโ€โ™‚๏ธ man wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F473 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฟโ€โ™‚ man wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2642",
+
+    "1F473 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ณโ€โ™€๏ธ woman wearing turban",
+    "\uD83D\uDC73\u200D\u2640\uFE0F",
+
+    "1F473 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ณโ€โ™€ woman wearing turban",
+    "\uD83D\uDC73\u200D\u2640",
+
+    "1F473 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿปโ€โ™€๏ธ woman wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F473 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿปโ€โ™€ woman wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2640",
+
+    "1F473 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผโ€โ™€๏ธ woman wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F473 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿผโ€โ™€ woman wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2640",
+
+    "1F473 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝโ€โ™€๏ธ woman wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F473 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฝโ€โ™€ woman wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2640",
+
+    "1F473 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพโ€โ™€๏ธ woman wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F473 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿพโ€โ™€ woman wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2640",
+
+    "1F473 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟโ€โ™€๏ธ woman wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F473 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฟโ€โ™€ woman wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2640",
+
+    "1F472                                      ; fully-qualified     # ๐Ÿ‘ฒ man with Chinese cap",
+    "\uD83D\uDC72",
+
+    "1F472 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿป man with Chinese cap: light skin tone",
+    "\uD83D\uDC72\uD83C\uDFFB",
+
+    "1F472 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿผ man with Chinese cap: medium-light skin tone",
+    "\uD83D\uDC72\uD83C\uDFFC",
+
+    "1F472 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿฝ man with Chinese cap: medium skin tone",
+    "\uD83D\uDC72\uD83C\uDFFD",
+
+    "1F472 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿพ man with Chinese cap: medium-dark skin tone",
+    "\uD83D\uDC72\uD83C\uDFFE",
+
+    "1F472 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿฟ man with Chinese cap: dark skin tone",
+    "\uD83D\uDC72\uD83C\uDFFF",
+
+    "1F9D5                                      ; fully-qualified     # ๐Ÿง• woman with headscarf",
+    "\uD83E\uDDD5",
+
+    "1F9D5 1F3FB                                ; fully-qualified     # ๐Ÿง•๐Ÿป woman with headscarf: light skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFB",
+
+    "1F9D5 1F3FC                                ; fully-qualified     # ๐Ÿง•๐Ÿผ woman with headscarf: medium-light skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFC",
+
+    "1F9D5 1F3FD                                ; fully-qualified     # ๐Ÿง•๐Ÿฝ woman with headscarf: medium skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFD",
+
+    "1F9D5 1F3FE                                ; fully-qualified     # ๐Ÿง•๐Ÿพ woman with headscarf: medium-dark skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFE",
+
+    "1F9D5 1F3FF                                ; fully-qualified     # ๐Ÿง•๐Ÿฟ woman with headscarf: dark skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFF",
+
+    "1F9D4                                      ; fully-qualified     # ๐Ÿง” bearded person",
+    "\uD83E\uDDD4",
+
+    "1F9D4 1F3FB                                ; fully-qualified     # ๐Ÿง”๐Ÿป bearded person: light skin t

<TRUNCATED>

[06/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
index 292f2ad..e4b10af 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
@@ -37,12 +37,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %integer
 %final
 %public
@@ -52,22 +53,73 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %xstate AVOID_BAD_URL
 %buffer 255
 
-// UAX#29 WB4. X (Extend | Format)* --> X
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+
+//////////////////////////////////////////////////////////////////////////
+// Begin Emoji Macros - see documentation below, near the EMOJI_TYPE rule
+
+// TODO: Remove this include file when JFlex supports these properties directly (in Unicode 11.0+)
+%include ../../../../../../../../../core/src/data/jflex/UnicodeEmojiProperties.jflex
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+//   \uFE0E (Text Presentation Selector) and \uFE0F (Emoji Presentation Selector) - included in \p{WB:Extend}
+//   - are explicitly excluded here so that we can properly handle Emoji sequences.
+//
+ExtFmtZwjSansPresSel = [[\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]--[\uFE0E\uFE0F]]*
+
+KeyCapBaseChar = [0-9#*]
+KeyCapBaseCharEx = {KeyCapBaseChar} {ExtFmtZwjSansPresSel}
+KeyCap = \u20E3
+KeyCapEx = {KeyCap} {ExtFmtZwjSansPresSel}
+
+// # \u3030 = WAVY DASH; \u303D = PART ALTERNATION MARK
+AccidentalEmoji = [ยฉยฎโ„ข\u3030\u303D]
+EmojiRKAM = ( \p{WB:Regional_Indicator} | {KeyCapBaseChar} | {AccidentalEmoji} | {Emoji_Modifier} )
+
+// Unlike Unicode properties, macros are not allowed in character classes, so we achieve set difference
+// by applying DeMorgan: the expression that matches everything of 'a' not matched by 'b' is: !(!a|b)
+// TODO: Convert this expression to character class difference when JFlex supports the properties directly (in Unicode 11.0+)
+EmojiSansRKAM = !( ! {Emoji} | {EmojiRKAM} )
+
+EmojiChar = ( {Extended_Pictographic} | {EmojiSansRKAM} )
+
+EmojiCharEx         = {EmojiChar}           {ExtFmtZwjSansPresSel}
+EmojiModifierBaseEx = {Emoji_Modifier_Base} {ExtFmtZwjSansPresSel}
+EmojiModifierEx     = {Emoji_Modifier}      {ExtFmtZwjSansPresSel}
+
+EmojiPresentationSelector = \uFE0F
+EmojiCharOrPresSeqOrModSeq = ( \p{WB:ZWJ}* {EmojiCharEx} {EmojiPresentationSelector}? ) | ( ( \p{WB:ZWJ}* {EmojiModifierBaseEx} )? {EmojiModifierEx} )
+TagSpec = [\u{E0020}-\u{E007E}]
+TagTerm = \u{E007F}
+
+// End Emoji Macros
+//////////////////////////////////////////////////////////////////////////
+
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
 //
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
-NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
-HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] {ExtFmtZwj}
+AHLetterEx          = [\p{WB:ALetter}\p{WB:Hebrew_Letter}]                      {ExtFmtZwj}
+NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        {ExtFmtZwj}
+KatakanaEx          = \p{WB:Katakana}                                           {ExtFmtZwj} 
+MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      {ExtFmtZwj} 
+MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         {ExtFmtZwj}
+ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       {ExtFmtZwj}
+HanEx               = \p{Script:Han}                                            {ExtFmtZwj}
+HiraganaEx          = \p{Script:Hiragana}                                       {ExtFmtZwj}
+SingleQuoteEx       = \p{WB:Single_Quote}                                       {ExtFmtZwj}
+DoubleQuoteEx       = \p{WB:Double_Quote}                                       {ExtFmtZwj}
+HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      {ExtFmtZwj}
+RegionalIndicatorEx = \p{WB:Regional_Indicator}                                 {ExtFmtZwj}
+ComplexContextEx    = \p{LB:Complex_Context}                                    {ExtFmtZwj}
+
 
 // URL and E-mail syntax specifications:
 //
@@ -174,18 +226,28 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
    */
   public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
   
+  /** Ideographic token type */
   public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
   
+  /** Hiragana token type */
   public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
   
+  /** Katakana token type */
   public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
   
+  /** Hangul token type */
   public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
   
+  /** Email token type */
   public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
   
+  /** URL token type */
   public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
 
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = UAX29URLEmailTokenizer.EMOJI;
+
+  /** Character count processed so far */
   public final int yychar()
   {
     return yychar;
@@ -213,11 +275,11 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
 
 <YYINITIAL, AVOID_BAD_URL> {
 
-// UAX#29 WB1.   sot   รท
-//        WB2.     รท   eot
+// UAX#29 WB1.    sot รท Any
+//        WB2.    Any รท eot
 //
   <<EOF>> { return YYEOF; }
-
+  
   {URL}   { yybegin(YYINITIAL); return URL_TYPE; }
 
   // LUCENE-5391: Don't recognize no-scheme domain-only URLs with a following alphanumeric character
@@ -244,14 +306,61 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
 
   {EMAIL} { yybegin(YYINITIAL); return EMAIL_TYPE; }
 
-  // UAX#29 WB8.   Numeric ร— Numeric
-  //        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) ร— Numeric
-  //        WB12.  Numeric ร— (MidNum | MidNumLet | Single_Quote) Numeric
-  //        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-  //        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana)
+
+  // Instead of these: UAX#29 WB3c. ZWJ ร— (Glue_After_Zwj | EBG)
+  //                          WB14. (E_Base | EBG) ร— E_Modifier
+  //                          WB15. ^ (RI RI)* RI ร— RI
+  //                          WB16. [^RI] (RI RI)* RI ร— RI
+  //
+  // We use the "emoji_sequence" rule from http://www.unicode.org/reports/tr51/tr51-14.html (Unicode 11.0)
+  // and the Emoji data from http://unicode.org/Public/emoji/11.0/emoji-data.txt (in included file UnicodeEmojiProperties.jflex)
+  // 
+  // emoji_sequence :=
+  //    Top-level EBNF           Expanded #1                       Expanded #2                       Expanded #3
+  //    ---------------------    ----------------------------      -----------------------------     ----------------------------------------------
+  //      emoji_core_sequence      emoji_combining_sequence          emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_keycap_sequence           | [0-9#*] \u{FE0F 20E3}      [1]
+  //                             | emoji_modifier_sequence                                           | \p{Emoji_Modifier_Base} \p{Emoji_Modifier}
+  //                             | emoji_flag_sequence                                               | \p{WB:Regional_Indicator}{2}               )
+  //
+  //    | emoji_zwj_sequence       emoji_zwj_element                 emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+  //                             ( ZWJ emoji_zwj_element )+                                          ( \p{WB:ZWJ} ^^ )+
+  // 
+  //    | emoji_tag_sequence     tag_base                            emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+  //                             tag_spec                                                            [\u{E0020}-\u{E007E}]+
+  //                             tag_term                                                            \u{E007F}
+  //
+  // [1] https://unicode.org/Public/emoji/11.0/emoji-test.txt includes key cap sequences 
+  //     WITHOUT \uFE0F (emoji presentation indicator), annotating them as "non-fully-qualified";
+  //     TR#51 says about non-fully-qualified *ZWJ sequences* that implementations may
+  //     choose whether to support them for segmentation.  This implementation will
+  //     recognize /[0-9#*]\u20E3/ - i.e. without \uFE0F - as Emoji. 
+  //
+  // See also: http://www.unicode.org/L2/L2016/16315-handling-seg-emoji.pdf
+  //           https://docs.google.com/document/d/1yDZ5TUZNVVKaM9zYCCLbRIAKGNZANsAGl0bcNzGGvn8
+  //
+  //     In particular, the above docs recommend a modified UAX#29 WB3c rule (covered by TR#51's "emoji_zwj_sequence"):
+  //
+  //         WB3cโ€ฒ ZWJ ร— โ€‹(Extended_Pictographic | EmojiNRK)
+  //
+    {EmojiCharOrPresSeqOrModSeq} ( ( \p{WB:ZWJ} {EmojiCharOrPresSeqOrModSeq} )* | {TagSpec}+ {TagTerm} ) 
+  | {KeyCapBaseCharEx} {EmojiPresentationSelector}? {KeyCapEx} 
+  | {RegionalIndicatorEx}{2} 
+    { yybegin(YYINITIAL); return EMOJI_TYPE; }
+
+  // UAX#29 WB8.    Numeric ร— Numeric
+  //        WB11.   Numeric (MidNum | MidNumLetQ) ร— Numeric
+  //        WB12.   Numeric ร— (MidNum | MidNumLetQ) Numeric
+  //        WB13a.  (AHLetter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+  //        WB13b.  ExtendNumLet ร— (AHLetter | Numeric | Katakana)
   //
   {ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
-    {  yybegin(YYINITIAL); return NUMERIC_TYPE; }
+    { yybegin(YYINITIAL); return NUMERIC_TYPE; }
 
   // subset of the below for typing purposes only!
   {HangulEx}+
@@ -260,32 +369,32 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   {KatakanaEx}+
     { yybegin(YYINITIAL); return KATAKANA_TYPE; }
 
-  // UAX#29 WB5.   (ALetter | Hebrew_Letter) ร— (ALetter | Hebrew_Letter)
-  //        WB6.   (ALetter | Hebrew_Letter) ร— (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-  //        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) ร— (ALetter | Hebrew_Letter)
-  //        WB7a.  Hebrew_Letter ร— Single_Quote
-  //        WB7b.  Hebrew_Letter ร— Double_Quote Hebrew_Letter
-  //        WB7c.  Hebrew_Letter Double_Quote ร— Hebrew_Letter
-  //        WB9.   (ALetter | Hebrew_Letter) ร— Numeric
-  //        WB10.  Numeric ร— (ALetter | Hebrew_Letter)
-  //        WB13.  Katakana ร— Katakana
-  //        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-  //        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana)
+  // UAX#29 WB5.    AHLetter ร— AHLetter
+  //        WB6.    AHLetter ร— (MidLetter | MidNumLetQ) AHLetter
+  //        WB7.    AHLetter (MidLetter | MidNumLetQ) ร— AHLetter
+  //        WB7a.   Hebrew_Letter ร— Single_Quote
+  //        WB7b.   Hebrew_Letter ร— Double_Quote Hebrew_Letter
+  //        WB7c.   Hebrew_Letter Double_Quote ร— Hebrew_Letter
+  //        WB9.    AHLetter ร— Numeric
+  //        WB10.   Numeric ร— AHLetter
+  //        WB13.   Katakana ร— Katakana
+  //        WB13a.  (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+  //        WB13b.  ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
   //
-  {ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                       | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+  {ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                       | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                        )+
                      )
-  ({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                       | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+  ({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                       | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                        )+
                      )
   )*
-  {ExtendNumLetEx}*
+  {ExtendNumLetEx}* 
     { yybegin(YYINITIAL); return WORD_TYPE; }
 
 
@@ -297,7 +406,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   //    annex.  That means that satisfactory treatment of languages like Chinese
   //    or Thai requires special handling.
   //
-  // In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
+  // In Unicode 9.0, only one character has the \p{Line_Break = Contingent_Break}
   // property: U+FFFC ( ๏ฟผ ) OBJECT REPLACEMENT CHARACTER.
   //
   // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -310,18 +419,15 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   //
   {ComplexContextEx}+ { yybegin(YYINITIAL); return SOUTH_EAST_ASIAN_TYPE; }
 
-  // UAX#29 WB14.  Any รท Any
+  // UAX#29 WB999.  Any รท Any
   //
   {HanEx} { yybegin(YYINITIAL); return IDEOGRAPHIC_TYPE; }
   {HiraganaEx} { yybegin(YYINITIAL); return HIRAGANA_TYPE; }
 
-
-  // UAX#29 WB3.   CR ร— LF
-  //        WB3a.  (Newline | CR | LF) รท
-  //        WB3b.  รท (Newline | CR | LF)
-  //        WB13c. Regional_Indicator ร— Regional_Indicator
-  //        WB14.  Any รท Any
+  // UAX#29 WB3.    CR ร— LF
+  //        WB3a.   (Newline | CR | LF) รท
+  //        WB3b.   รท (Newline | CR | LF)
+  //        WB999.  Any รท Any
   //
-  {RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-    { yybegin(YYINITIAL); /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+  [^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */ }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
index 7f9227f..9295e1c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -341,7 +341,7 @@ class WikipediaTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -419,11 +419,11 @@ class WikipediaTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -575,28 +575,29 @@ final void reset() {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -820,199 +821,245 @@ final void reset() {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 47: break;
-        case 2: 
-          { positionInc = 1; return ALPHANUM;
-          }
-        case 48: break;
-        case 3: 
-          { positionInc = 1; return CJ;
-          }
-        case 49: break;
-        case 4: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 50: break;
-        case 5: 
-          { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 51: break;
-        case 6: 
-          { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 52: break;
-        case 7: 
-          { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 53: break;
-        case 8: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
-          }
-        case 54: break;
-        case 9: 
-          { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
-          }
-        case 55: break;
-        case 10: 
-          { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 56: break;
-        case 11: 
-          { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 57: break;
-        case 12: 
-          { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
-          }
-        case 58: break;
-        case 13: 
-          { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 59: break;
-        case 14: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
-          }
-        case 60: break;
-        case 15: 
-          { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 61: break;
-        case 16: 
-          { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 62: break;
-        case 17: 
-          { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
-          }
-        case 63: break;
-        case 18: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
-          }
-        case 64: break;
-        case 19: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
-          }
-        case 65: break;
-        case 20: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 66: break;
-        case 21: 
-          { yybegin(STRING); return currentTokType;/*pipe*/
-          }
-        case 67: break;
-        case 22: 
-          { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 68: break;
-        case 23: 
-          { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 69: break;
-        case 24: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 70: break;
-        case 25: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 71: break;
-        case 26: 
-          { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 72: break;
-        case 27: 
-          { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 73: break;
-        case 28: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 74: break;
-        case 29: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 75: break;
-        case 30: 
-          { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 76: break;
-        case 31: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
-          }
-        case 77: break;
-        case 32: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 78: break;
-        case 33: 
-          { positionInc = 1; return APOSTROPHE;
-          }
-        case 79: break;
-        case 34: 
-          { positionInc = 1; return HOST;
-          }
-        case 80: break;
-        case 35: 
-          { positionInc = 1; return NUM;
-          }
-        case 81: break;
-        case 36: 
-          { positionInc = 1; return COMPANY;
-          }
-        case 82: break;
-        case 37: 
-          { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 83: break;
-        case 38: 
-          { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
-          }
-        case 84: break;
-        case 39: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
-          }
-        case 85: break;
-        case 40: 
-          { positionInc = 1; return ACRONYM;
-          }
-        case 86: break;
-        case 41: 
-          { positionInc = 1; return EMAIL;
-          }
-        case 87: break;
-        case 42: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
-          }
-        case 88: break;
-        case 43: 
-          { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
-          }
-        case 89: break;
-        case 44: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 90: break;
-        case 45: 
-          { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 91: break;
-        case 46: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 92: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            return YYEOF;
-          } 
-          else {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+        return YYEOF;
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 47: break;
+          case 2: 
+            { positionInc = 1; return ALPHANUM;
+            } 
+            // fall through
+          case 48: break;
+          case 3: 
+            { positionInc = 1; return CJ;
+            } 
+            // fall through
+          case 49: break;
+          case 4: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 50: break;
+          case 5: 
+            { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 51: break;
+          case 6: 
+            { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 52: break;
+          case 7: 
+            { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 53: break;
+          case 8: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+            } 
+            // fall through
+          case 54: break;
+          case 9: 
+            { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+            } 
+            // fall through
+          case 55: break;
+          case 10: 
+            { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 56: break;
+          case 11: 
+            { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 57: break;
+          case 12: 
+            { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
+            } 
+            // fall through
+          case 58: break;
+          case 13: 
+            { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 59: break;
+          case 14: 
+            { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 60: break;
+          case 15: 
+            { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 61: break;
+          case 16: 
+            { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 62: break;
+          case 17: 
+            { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
+            } 
+            // fall through
+          case 63: break;
+          case 18: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
+            } 
+            // fall through
+          case 64: break;
+          case 19: 
+            { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
+            } 
+            // fall through
+          case 65: break;
+          case 20: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 66: break;
+          case 21: 
+            { yybegin(STRING); return currentTokType;/*pipe*/
+            } 
+            // fall through
+          case 67: break;
+          case 22: 
+            { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 68: break;
+          case 23: 
+            { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 69: break;
+          case 24: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 70: break;
+          case 25: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 71: break;
+          case 26: 
+            { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 72: break;
+          case 27: 
+            { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 73: break;
+          case 28: 
+            { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 74: break;
+          case 29: 
+            { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 75: break;
+          case 30: 
+            { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 76: break;
+          case 31: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
+            } 
+            // fall through
+          case 77: break;
+          case 32: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 78: break;
+          case 33: 
+            { positionInc = 1; return APOSTROPHE;
+            } 
+            // fall through
+          case 79: break;
+          case 34: 
+            { positionInc = 1; return HOST;
+            } 
+            // fall through
+          case 80: break;
+          case 35: 
+            { positionInc = 1; return NUM;
+            } 
+            // fall through
+          case 81: break;
+          case 36: 
+            { positionInc = 1; return COMPANY;
+            } 
+            // fall through
+          case 82: break;
+          case 37: 
+            { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 83: break;
+          case 38: 
+            { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
+            } 
+            // fall through
+          case 84: break;
+          case 39: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
+            } 
+            // fall through
+          case 85: break;
+          case 40: 
+            { positionInc = 1; return ACRONYM;
+            } 
+            // fall through
+          case 86: break;
+          case 41: 
+            { positionInc = 1; return EMAIL;
+            } 
+            // fall through
+          case 87: break;
+          case 42: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
+            } 
+            // fall through
+          case 88: break;
+          case 43: 
+            { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+            } 
+            // fall through
+          case 89: break;
+          case 44: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 90: break;
+          case 45: 
+            { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 91: break;
+          case 46: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 92: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
index cf6c65a..758d5d2 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
@@ -499,7 +499,7 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
 
     String randomHtmlishString2 // Don't create a comment (disallow "<!--") and don't include a closing ">"
         = TestUtil.randomHtmlishString(random(), maxNumElems).replaceAll(">", " ").replaceFirst("^--","__");
-    String unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 +"-[CDATA[";
+    String unclosedAngleBangNonCDATA = "<!" + randomHtmlishString2 +"-[CDATA[";
 
     String[] testGold = {
         "one<![CDATA[<one><two>three<four></four></two></one>]]>two",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
index b3b0ce1..507eb09 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
@@ -361,14 +361,14 @@ public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase {
 
     StringBuilder bToken = new StringBuilder();
     // exact max length:
-    for(int i=0;i<StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;i++) {
+    for(int i=0;i<UAX29URLEmailAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;i++) {
       bToken.append('b');
     }
 
     String bString = bToken.toString();
     // first bString is exact max default length; next one is 1 too long
     String input = "x " + bString + " " + bString + "b";
-    assertAnalyzesTo(a, input.toString(), new String[] {"x", bString, bString, "b"});
+    assertAnalyzesTo(a, input, new String[] {"x", bString, bString, "b"});
     a.close();
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
index d9d8381..76c5d55 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
@@ -467,7 +467,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
   }
 
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+    WordBreakTestUnicode_9_0_0 wordBreakTest = new WordBreakTestUnicode_9_0_0();
     wordBreakTest.test(a);
   }
   
@@ -545,6 +545,80 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
   }
 
 
+  /** simple emoji */
+  public void testEmoji() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉ ๐Ÿ’ฉ๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "๐Ÿ’ฉ", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence */
+  public void testEmojiSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ",
+        new String[] { "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence with fitzpatrick modifier */
+  public void testEmojiSequenceWithModifier() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ",
+        new String[] { "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** regional indicator */
+  public void testEmojiRegionalIndicator() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡บ๐Ÿ‡ธ",
+        new String[] { "๐Ÿ‡บ๐Ÿ‡ธ", "๐Ÿ‡บ๐Ÿ‡ธ" },
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** variation sequence */
+  public void testEmojiVariationSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#๏ธโƒฃ",
+        new String[] { "#๏ธโƒฃ" },
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3๏ธโƒฃ",
+        new String[] { "3๏ธโƒฃ",},
+        new String[] { "<EMOJI>" });
+
+    // text presentation sequences
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
+        new String[] { },
+        new String[] { });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
+        new String[] { "3\uFE0E",},
+        new String[] { "<NUM>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
+        new String[] { "\u2B55",},
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
+        new String[] { "\u2B55", "\u200D\u2B55"},
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  public void testEmojiTagSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ",
+        new String[] { "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  public void testEmojiTokenization() throws Exception {
+    // simple emoji around latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "poo๐Ÿ’ฉpoo",
+        new String[] { "poo", "๐Ÿ’ฉ", "poo" },
+        new String[] { "<ALPHANUM>", "<EMOJI>", "<ALPHANUM>" });
+    // simple emoji around non-latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉไธญๅœ‹๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "ไธญ", "ๅœ‹", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<EMOJI>" });
+  }
+
+  public void testUnicodeEmojiTests() throws Exception {
+    EmojiTokenizationTestUnicode_11_0 emojiTest = new EmojiTokenizationTestUnicode_11_0();
+    emojiTest.test(a);
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index 789fc5f..0dc3884 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -2388,7 +2388,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
 
   <!-- JFlex task -->
   <target name="-install-jflex" unless="jflex.loaded" depends="ivy-availability-check,ivy-configure">
-    <ivy:cachepath organisation="de.jflex" module="jflex" revision="1.6.0"
+    <ivy:cachepath organisation="de.jflex" module="jflex" revision="1.7.0"
                    inline="true" conf="default" transitive="true" pathid="jflex.classpath"/>
     <taskdef name="jflex" classname="jflex.anttask.JFlexTask" classpathref="jflex.classpath"/>
     <property name="jflex.loaded" value="true"/>
@@ -2645,7 +2645,11 @@ The following arguments can be provided to ant to alter its behaviour and target
     <attribute name="dir"/>
     <attribute name="name"/>
     <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+      <!-- The default skeleton is specified here to work around a JFlex ant task bug:    -->
+      <!-- invocations with a non-default skeleton will cause following invocations to    -->
+      <!-- use the same skeleton, though not specified, unless the default is configured. -->
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on"
+             skeleton="${common.dir}/core/src/data/jflex/skeleton.default"/>
     </sequential>
   </macrodef>
 
@@ -2653,20 +2657,13 @@ The following arguments can be provided to ant to alter its behaviour and target
     <attribute name="dir"/>
     <attribute name="name"/>
     <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
       <!-- LUCENE-5897: Disallow scanner buffer expansion -->
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
-                     replace="" flags="s" />
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on"
+             skeleton="${common.dir}/core/src/data/jflex/skeleton.disable.buffer.expansion.txt"/>
+      <!-- Since the ZZ_BUFFERSIZE declaration is generated rather than in the skeleton, we have to transform it here. -->
       <replaceregexp file="@{dir}/@{name}.java"
                      match="private static final int ZZ_BUFFERSIZE ="
                      replace="private int ZZ_BUFFERSIZE ="/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="int requested = zzBuffer.length - zzEndRead;"
-                     replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="(zzFinalHighSurrogate = 1;)(\r?\n)"
-                     replace="\1\2          if (totalRead == 1) { return true; }\2"/>
     </sequential>
   </macrodef>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex b/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
new file mode 100644
index 0000000..c631dee
--- /dev/null
+++ b/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file was automatically generated by getUnicodeEmojiProperties.pl
+// from: http://unicode.org/Public/emoji/11.0/emoji-data.txt 
+
+Emoji = [\u{23}\u{2A}\u{30}-\u{39}\u{A9}\u{AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2604}\u{260E}\u{2611}\u{2614}-\u{2615}\u{2618}\u{261D}\u{2620}\u{2622}-\u{2623}\u{2626}\u{262A}\u{262E}-\u{262F}\u{2638}-\u{263A}\u{2640}\u{2642}\u{2648}-\u{2653}\u{265F}-\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267E}-\u{267F}\u{2692}-\u{2697}\u{2699}\u{269B}-\u{269C}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26B0}-\u{26B1}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26C8}\u{26CE}-\u{26CF}\u{26D1}\u{26D3}-\u{26D4}\u{26E9}-\u{26EA}\u{26F0}-\u{26F5}\u{26F7}-\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270D}\u{270F}\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{
 2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E6}-\u{1F1FF}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}-\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F7}-\u{1F4FD}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}-\u{1F570}\u{1F573}-\u{1F57A}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}-\u{1F596}\u{1F5A4}-\u{1F5A5}\u{1F5A8}\u{1F5B1}-\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5E8}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D2}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6EC}\u{1F6F0}\u{1F6F3}-\u{1F6F9}\u{1F910}-\u{1F93A}\u{1F93C}-\u{1F93E}\u{1F940}-\u{1F945}\u{1F947}-\u{1F970}\u{1F973}-\u{1F976}\u{1F97A}\u{1F97C}-\u{1F9A2}\u{1F9B0}-\u{1F9B9}\u{1F9C0}-\u{1F9C2}\u{1F9D0}-\u{1F9FF}]
+Emoji_Modifier = [\u{1F3FB}-\u{1F3FF}]
+Emoji_Modifier_Base = [\u{261D}\u{26F9}\u{270A}-\u{270D}\u{1F385}\u{1F3C2}-\u{1F3C4}\u{1F3C7}\u{1F3CA}-\u{1F3CC}\u{1F442}-\u{1F443}\u{1F446}-\u{1F450}\u{1F466}-\u{1F469}\u{1F46E}\u{1F470}-\u{1F478}\u{1F47C}\u{1F481}-\u{1F483}\u{1F485}-\u{1F487}\u{1F4AA}\u{1F574}-\u{1F575}\u{1F57A}\u{1F590}\u{1F595}-\u{1F596}\u{1F645}-\u{1F647}\u{1F64B}-\u{1F64F}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F6C0}\u{1F6CC}\u{1F918}-\u{1F91C}\u{1F91E}-\u{1F91F}\u{1F926}\u{1F930}-\u{1F939}\u{1F93D}-\u{1F93E}\u{1F9B5}-\u{1F9B6}\u{1F9B8}-\u{1F9B9}\u{1F9D1}-\u{1F9DD}]
+Extended_Pictographic = [\u{A9}\u{AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{2388}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2605}\u{2607}-\u{2612}\u{2614}-\u{2685}\u{2690}-\u{2705}\u{2708}-\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2767}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F000}-\u{1F0FF}\u{1F10D}-\u{1F10F}\u{1F12F}\u{1F16C}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1AD}-\u{1F1E5}\u{1F201}-\u{1F20F}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F23C}-\u{1F23F}\u{1F249}-\u{1F3FA}\u{1F400}-\u{1F53D}\u{1F546}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{1F774}-\u{1F77F}\u{1F7D5}-\u{1F7FF}\u{1F80C}-\u{1F80F}\u{1F848}-\u{1F84F}\u{1F85A}-\u{1F85F}\u{1F888}-\u{1F88F}\u{1F8AE
 }-\u{1F8FF}\u{1F90C}-\u{1F93A}\u{1F93C}-\u{1F945}\u{1F947}-\u{1FFFD}]
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl b/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
new file mode 100644
index 0000000..e818b64
--- /dev/null
+++ b/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
@@ -0,0 +1,168 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+/) {
+    print STDERR "Usage: $script_name -v <version>\n";
+    print STDERR "\tversion must be of the form X.Y, e.g. 9.0\n"
+        if ($version);
+    exit 1;
+}
+my $emoji_data_url = "http://unicode.org/Public/emoji/$version/emoji-data.txt";
+my $output_filename = "UnicodeEmojiProperties.jflex";
+my $header =<<"__HEADER__";
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file was automatically generated by ${script_name}
+// from: ${emoji_data_url} 
+
+__HEADER__
+
+my $property_ranges = {};
+my $wanted_properties = { 'Emoji' => 1, 'Emoji_Modifier' => 1, 'Emoji_Modifier_Base' => 1, 'Extended_Pictographic' => 1 };
+
+parse_emoji_data_file($emoji_data_url, $property_ranges, $wanted_properties);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+output_jflex_include_file($output_path, $property_ranges);
+
+
+# sub parse_emoji_data_file
+#
+# Downloads and parses the emoji_data.txt file, extracting code point ranges
+# assigned to property values with age not younger than the passed-in version,
+# except for the Extended_Pictographic property, for which all code point ranges
+# are extracted, regardless of age.
+#
+# Parameters:
+#
+#  - Emoji data file URL
+#  - Reference to hash of properties mapped to an array of alternating (start,end) code point ranges
+#  - Reference to hash of wanted property names
+#
+sub parse_emoji_data_file {
+    my $url = shift;
+    my $prop_ranges = shift;
+    my $wanted_props = shift;
+    my $content = get_URL_content($url);
+    print STDERR "Parsing '$url'...";
+    my @lines = split /\r?\n/, $content;
+    for (@lines) {
+        ## 231A..231B    ; Emoji_Presentation   #  1.1  [2] (โŒš..โŒ›)    watch..hourglass done
+        ## 1F9C0         ; Emoji_Presentation   #  8.0  [1] (๐Ÿง€)       cheese wedge
+        ## 1FA00..1FA5F  ; Extended_Pictographic#   NA [96] (๐Ÿจ€๏ธ..๐ŸฉŸ๏ธ)    <reserved-1FA00>..<reserved-1FA5F>
+        if (my ($start,$end,$prop) = /^([0-9A-F]{4,5})(?:\.\.([0-9A-F]{4,5}))?\s*;\s*([^\s#]+)/) {
+            next unless defined($wanted_props->{$prop});  # Skip unless we want ranges for this property
+            
+            if (not defined($prop_ranges->{$prop})) {
+                $prop_ranges->{$prop} = [];
+            }
+            $end = $start unless defined($end);
+            my $start_dec = hex $start;
+            my $end_dec = hex $end;
+            my $ranges = $prop_ranges->{$prop};
+            if (scalar(@$ranges) == 0 || $start_dec > $ranges->[-1] + 1) { # Can't merge range with previous range
+                # print STDERR "Adding new range ($start, $end)\n";
+                push @$ranges, $start_dec, $end_dec;
+            } else {
+                # printf STDERR "Merging range (%s, %s) with previous range (%X, %X)\n", $start, $end, $ranges->[-2], $ranges->[-1];
+                $ranges->[-1] = $end_dec;
+            }
+        } else {
+            # print STDERR "Skipping line (no data): $_\n";
+        }
+    }
+    print STDERR "done.\n";
+}
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+# Parameter:
+#
+#  - URL to get content for
+#
+sub get_URL_content {
+    my $url = shift;
+    print STDERR "Retrieving '$url'...";
+    my $user_agent = LWP::UserAgent->new;
+    my $request = HTTP::Request->new(GET => $url);
+    my $response = $user_agent->request($request);
+    unless ($response->is_success) {
+        print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+        exit 1;
+    }
+    print STDERR "done.\n";
+    return $response->content;
+}
+
+
+# sub output_jflex_include_file
+#
+# Parameters:
+#
+#  - Output path
+#  - Reference to hash mapping properties to an array of alternating (start,end) codepoint ranges
+#     
+sub output_jflex_include_file {
+    my $path = shift;
+    my $prop_ranges = shift;
+    open OUT, ">$path"
+        || die "Error opening '$path' for writing: $!";
+
+    print STDERR "Writing '$path'...";
+
+    print OUT $header;
+
+    for my $prop (sort keys %$prop_ranges) {
+        my $ranges = $prop_ranges->{$prop};
+        print OUT "$prop = [";
+        for (my $index = 0 ; $index < scalar(@$ranges) ; $index += 2) {
+            printf OUT "\\u{%X}", $ranges->[$index];
+            printf OUT "-\\u{%X}", $ranges->[$index + 1] if ($ranges->[$index + 1] > $ranges->[$index]);
+        }
+        print OUT "]\n";
+    }
+
+    print OUT "\n";
+    close OUT;
+    print STDERR "done.\n";
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/core/src/data/jflex/skeleton.default
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/skeleton.default b/lucene/core/src/data/jflex/skeleton.default
new file mode 100644
index 0000000..9e08fbb
--- /dev/null
+++ b/lucene/core/src/data/jflex/skeleton.default
@@ -0,0 +1,342 @@
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+--- private static final int ZZ_BUFFERSIZE = ...;
+
+  /** lexical states */
+---  lexical states, charmap
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unknown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+--- isFinal list
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true iff the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+--- user class code
+
+--- constructor declaration
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[zzBuffer.length*2];
+      System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+      zzBuffer = newBuffer;
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+    }
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
+        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+        }
+      }
+      /* potentially more input available */
+      return false;
+    }
+
+    /* numRead < 0 ==> end of stream */
+    return true;
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+--- zzScanError declaration
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+--- throws clause
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+--- yypushback decl (contains zzScanError exception)
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+--- zzDoEOF
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+--- yylex declaration
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+--- local declarations
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+--- start admin (line, char, col count)
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+--- start admin (lexstate etc)
+
+      zzForAction: {
+        while (true) {
+    
+--- next input, line, col, char count, next transition, isFinal action
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+--- line count update
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+--- char count update
+
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+--- eofvalue
+      }
+      else {
+--- actions
+          default:
+--- no match
+        }
+      }
+    }
+  }
+
+--- main
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
new file mode 100644
index 0000000..a9dabcf
--- /dev/null
+++ b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
@@ -0,0 +1,348 @@
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+--- private static final int ZZ_BUFFERSIZE = ...;
+
+  /** lexical states */
+---  lexical states, charmap
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unknown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+--- isFinal list
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true iff the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+--- user class code
+
+--- constructor declaration
+
+/* -------------------------------------------------------------------------------- */
+/* Begin Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;
+    if (requested == 0) {
+      return true;
+    }
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+        if (numRead == requested) { // We might have requested too few chars to encode a full Unicode character.
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+          if (numRead == 1) {
+            return true;
+          }
+        } else {                    // There is room in the buffer for at least one more char
+          int c = zzReader.read();  // Expecting to read a low surrogate char
+          if (c == -1) {
+            return true;
+          } else {
+            zzBuffer[zzEndRead++] = (char)c;
+            return false;
+          }
+        }
+      }
+      /* potentially more input available */
+      return false;
+    }
+
+    /* numRead < 0 ==> end of stream */
+    return true;
+  }
+
+/* End Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+/* ------------------------------------------------------------------------------ */
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+--- zzScanError declaration
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+--- throws clause
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+--- yypushback decl (contains zzScanError exception)
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+--- zzDoEOF
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+--- yylex declaration
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+--- local declarations
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+--- start admin (line, char, col count)
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+--- start admin (lexstate etc)
+
+      zzForAction: {
+        while (true) {
+    
+--- next input, line, col, char count, next transition, isFinal action
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+--- line count update
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+--- char count update
+
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+--- eofvalue
+      }
+      else {
+--- actions
+          default:
+--- no match
+        }
+      }
+    }
+  }
+
+--- main
+
+}


[02/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
new file mode 100644
index 0000000..027a1b5
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_9_0_0.java
@@ -0,0 +1,8276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
+ * from: http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakTest.txt
+ *
+ * WordBreakTest.txt indicates the points in the provided character sequences
+ * at which conforming implementations must and must not break words.  This
+ * class tests for expected token extraction from each of the test sequences
+ * in WordBreakTest.txt, where the expected tokens are those character
+ * sequences bounded by word breaks and containing at least one character
+ * from one of the following character sets:
+ *
+ *    \p{Script = Han}                (From http://www.unicode.org/Public/9.0.0/ucd/Scripts.txt)
+ *    \p{Script = Hiragana}
+ *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/9.0.0/ucd/LineBreak.txt)
+ *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakProperty.txt)
+ *    \p{WordBreak = Hebrew_Letter}
+ *    \p{WordBreak = Katakana}
+ *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
+ *    [\uFF10-\uFF19]                 (Full-width Arabic digits)
+ */
+@Ignore
+public class WordBreakTestUnicode_9_0_0 extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    // รท 0001 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0001",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
+                     new String[] {  });
+
+    // รท 0001 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\r",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
+                     new String[] {  });
+
+    // รท 0001 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\n",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
+                     new String[] {  });
+
+    // รท 0001 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u000B",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
+                     new String[] {  });
+
+    // รท 0001 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 0001 ร— 0308 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 0001 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 0001 ร— 0308 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 0001 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u003A",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
+                     new String[] {  });
+
+    // รท 0001 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002C",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
+                     new String[] {  });
+
+    // รท 0001 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002E",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
+                     new String[] {  });
+
+    // รท 0001 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 0001 ร— 0308 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 0001 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u005F",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
+                     new String[] {  });
+
+    // รท 0001 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 0001 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 0001 ร— 0308 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 0001 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\"",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
+                     new String[] {  });
+
+    // รท 0001 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0027",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
+                     new String[] {  });
+
+    // รท 0001 รท 261D รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 0001 ร— 0308 รท 261D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 0001 รท 1F3FB รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 0001 ร— 0308 รท 1F3FB รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 0001 รท 2764 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 0001 ร— 0308 รท 2764 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 0001 รท 1F466 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 0001 ร— 0308 รท 1F466 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 0001 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u00AD",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 0001 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0300",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
+                     new String[] {  });
+
+    // รท 0001 ร— 200D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u200D",
+                     new String[] {  });
+
+    // รท 0001 ร— 0308 ร— 200D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u200D",
+                     new String[] {  });
+
+    // รท 0001 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 0001 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 0001 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 0001 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 0001 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 0001 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 0001 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0001",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000D รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\r",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\r",
+                     new String[] {  });
+
+    // รท 000D ร— 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) ร— [3.0] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\n",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\n",
+                     new String[] {  });
+
+    // รท 000D รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u000B",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000D รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000D รท 0308 รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000D รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000D รท 0308 รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000D รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u003A",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000D รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002C",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000D รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002E",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000D รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000D รท 0308 รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000D รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u005F",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000D รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000D รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000D รท 0308 รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000D รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\"",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\"",
+                     new String[] {  });
+
+    // รท 000D รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0027",
+                     new String[] {  });
+
+    // รท 000D รท 0308 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000D รท 261D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000D รท 0308 รท 261D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000D รท 1F3FB รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000D รท 0308 รท 1F3FB รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000D รท 2764 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000D รท 0308 รท 2764 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000D รท 1F466 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000D รท 0308 รท 1F466 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000D รท 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u00AD",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000D รท 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0300",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000D รท 200D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u200D",
+                     new String[] {  });
+
+    // รท 000D รท 0308 ร— 200D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000D รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000D รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000D รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000D รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000D รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0001",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000A รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\r",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\r",
+                     new String[] {  });
+
+    // รท 000A รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\n",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\n",
+                     new String[] {  });
+
+    // รท 000A รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u000B",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000A รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000A รท 0308 รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000A รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000A รท 0308 รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000A รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u003A",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000A รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002C",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000A รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002E",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000A รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000A รท 0308 รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000A รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u005F",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000A รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000A รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000A รท 0308 รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000A รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\"",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\"",
+                     new String[] {  });
+
+    // รท 000A รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0027",
+                     new String[] {  });
+
+    // รท 000A รท 0308 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000A รท 261D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000A รท 0308 รท 261D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000A รท 1F3FB รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000A รท 0308 รท 1F3FB รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000A รท 2764 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000A รท 0308 รท 2764 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000A รท 1F466 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000A รท 0308 รท 1F466 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000A รท 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u00AD",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000A รท 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0300",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000A รท 200D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u200D",
+                     new String[] {  });
+
+    // รท 000A รท 0308 ร— 200D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000A รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000A รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000A รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000A รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000A รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0001",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
+                     new String[] {  });
+
+    // รท 000B รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\r",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
+                     new String[] {  });
+
+    // รท 000B รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\n",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
+                     new String[] {  });
+
+    // รท 000B รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u000B",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
+                     new String[] {  });
+
+    // รท 000B รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000B รท 0308 รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // รท 000B รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000B รท 0308 รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // รท 000B รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u003A",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
+                     new String[] {  });
+
+    // รท 000B รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002C",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
+                     new String[] {  });
+
+    // รท 000B รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002E",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
+                     new String[] {  });
+
+    // รท 000B รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000B รท 0308 รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // รท 000B รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u005F",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
+                     new String[] {  });
+
+    // รท 000B รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // รท 000B รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000B รท 0308 รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // รท 000B รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\"",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
+                     new String[] {  });
+
+    // รท 000B รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0027",
+                     new String[] {  });
+
+    // รท 000B รท 0308 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
+                     new String[] {  });
+
+    // รท 000B รท 261D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000B รท 0308 รท 261D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u261D",
+                     new String[] { "\u261D" });
+
+    // รท 000B รท 1F3FB รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000B รท 0308 รท 1F3FB รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDFFB",
+                     new String[] { "\uD83C\uDFFB" });
+
+    // รท 000B รท 2764 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000B รท 0308 รท 2764 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u2764",
+                     new String[] { "\u2764" });
+
+    // รท 000B รท 1F466 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000B รท 0308 รท 1F466 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83D\uDC66",
+                     new String[] { "\uD83D\uDC66" });
+
+    // รท 000B รท 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u00AD",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
+                     new String[] {  });
+
+    // รท 000B รท 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0300",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
+                     new String[] {  });
+
+    // รท 000B รท 200D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u200D",
+                     new String[] {  });
+
+    // รท 000B รท 0308 ร— 200D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u200D",
+                     new String[] {  });
+
+    // รท 000B รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000B รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // รท 000B รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // รท 000B รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 000B รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // รท 3031 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0001",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\r",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\n",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u000B",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u3031",
+                     new String[] { "\u3031\u3031" });
+
+    // รท 3031 ร— 0308 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
+                     new String[] { "\u3031\u0308\u3031" });
+
+    // รท 3031 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0041",
+                     new String[] { "\u3031", "\u0041" });
+
+    // รท 3031 ร— 0308 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
+                     new String[] { "\u3031\u0308", "\u0041" });
+
+    // รท 3031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u003A",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002C",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002E",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0030",
+                     new String[] { "\u3031", "\u0030" });
+
+    // รท 3031 ร— 0308 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
+                     new String[] { "\u3031\u0308", "\u0030" });
+
+    // รท 3031 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u005F",
+                     new String[] { "\u3031\u005F" });
+
+    // รท 3031 ร— 0308 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
+                     new String[] { "\u3031\u0308\u005F" });
+
+    // รท 3031 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u05D0",
+                     new String[] { "\u3031", "\u05D0" });
+
+    // รท 3031 ร— 0308 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
+                     new String[] { "\u3031\u0308", "\u05D0" });
+
+    // รท 3031 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\"",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0027",
+                     new String[] { "\u3031" });
+
+    // รท 3031 ร— 0308 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
+                     new String[] { "\u3031\u0308" });
+
+    // รท 3031 รท 261D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u261D",
+                     new String[] { "\u3031", "\u261D" });
+
+    // รท 3031 ร— 0308 รท 261D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u261D",
+                     new String[] { "\u3031\u0308", "\u261D" });
+
+    // รท 3031 รท 1F3FB รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDFFB",
+                     new String[] { "\u3031", "\uD83C\uDFFB" });
+
+    // รท 3031 ร— 0308 รท 1F3FB รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDFFB",
+                     new String[] { "\u3031\u0308", "\uD83C\uDFFB" });
+
+    // รท 3031 รท 2764 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u2764",
+                     new String[] { "\u3031", "\u2764" });
+
+    // รท 3031 ร— 0308 รท 2764 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u2764",
+                     new String[] { "\u3031\u0308", "\u2764" });
+
+    // รท 3031 รท 1F466 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83D\uDC66",
+                     new String[] { "\u3031", "\uD83D\uDC66" });
+
+    // รท 3031 ร— 0308 รท 1F466 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83D\uDC66",
+                     new String[] { "\u3031\u0308", "\uD83D\uDC66" });
+
+    // รท 3031 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u00AD",
+                     new String[] { "\u3031\u00AD" });
+
+    // รท 3031 ร— 0308 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
+                     new String[] { "\u3031\u0308\u00AD" });
+
+    // รท 3031 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0300",
+                     new String[] { "\u3031\u0300" });
+
+    // รท 3031 ร— 0308 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
+                     new String[] { "\u3031\u0308\u0300" });
+
+    // รท 3031 ร— 200D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u200D",
+                     new String[] { "\u3031\u200D" });
+
+    // รท 3031 ร— 0308 ร— 200D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] ZERO WIDTH JOINER (ZWJ_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u200D",
+                     new String[] { "\u3031\u0308\u200D" });
+
+    // รท 3031 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
+                     new String[] { "\u3031", "\u0061\u2060" });
+
+    // รท 3031 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
+                     new String[] { "\u3031\u0308", "\u0061\u2060" });
+
+    // รท 3031 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
+                     new String[] { "\u3031", "\u0061" });
+
+    // รท 3031 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // รท 3031 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 3031 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
+                     new String[] { "\u3031", "\u0031" });
+
+    // รท 3031 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // รท 0041 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0001",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\r",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\n",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u000B",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u3031",
+                     new String[] { "\u0041", "\u3031" });
+
+    // รท 0041 ร— 0308 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
+                     new String[] { "\u0041\u0308", "\u3031" });
+
+    // รท 0041 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0041",
+                     new String[] { "\u0041\u0041" });
+
+    // รท 0041 ร— 0308 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
+                     new String[] { "\u0041\u0308\u0041" });
+
+    // รท 0041 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u003A",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002C",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002E",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0030",
+                     new String[] { "\u0041\u0030" });
+
+    // รท 0041 ร— 0308 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
+                     new String[] { "\u0041\u0308\u0030" });
+
+    // รท 0041 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u005F",
+                     new String[] { "\u0041\u005F" });
+
+    // รท 0041 ร— 0308 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
+                     new String[] { "\u0041\u0308\u005F" });
+
+    // รท 0041 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u05D0",
+                     new String[] { "\u0041\u05D0" });
+
+    // รท 0041 ร— 0308 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
+                     new String[] { "\u0041\u0308\u05D0" });
+
+    // รท 0041 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\"",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0027",
+                     new String[] { "\u0041" });
+
+    // รท 0041 ร— 0308 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
+                     new String[] { "\u0041\u0308" });
+
+    // รท 0041 รท 261D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u261D",
+                     new String[] { "\u0041", "\u261D" });
+
+    // รท 0041 ร— 0308 รท 261D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u261D",
+                     new String[] { "\u0041\u0308", "\u261D" });
+
+    // รท 0041 รท 1F3FB รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDFFB",
+                     new String[] { "\u0041", "\uD83C\uDFFB" });
+
+    // รท 0041 ร— 0308 รท 1F3FB รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] EMOJI MODIFIER FITZPATRICK TYPE-1-2 (E_Modifier) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDFFB",
+                     new String[] { "\u0041\u0308", "\uD83C\uDFFB" });
+
+    // รท 0041 รท 2764 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u2764",
+                     new String[] { "\u0041", "\u2764" });
+
+    // รท 0041 ร— 0308 รท 2764 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEAVY BLACK HEART (Glue_After_Zwj) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u2764",
+                     new String[] { "\u0041\u0308", "\u2764" });
+
+    // รท 0041 รท 1F466 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] BOY (EBG) รท [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83D\uDC66",
+                     new String[] { "\u0041", "\uD83D\uDC66" });
+
+    // รท 0041 ร— 0308 รท 1F466 รท  #  รท [0.2] LATIN CAPITA

<TRUNCATED>

[07/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
index a797082..3d964d9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -39,6 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
@@ -69,156 +70,221 @@ public final class UAX29URLEmailTokenizerImpl {
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\1\112\10\110\2\112\2\110\1\112\23\110\1\113\1\17\1\103\1\113"+
-    "\1\75\1\73\1\16\2\76\2\113\1\77\1\57\1\24\1\102\1\65"+
-    "\1\60\1\63\1\62\1\61\1\71\1\70\1\66\1\67\1\64\1\72"+
-    "\1\106\1\110\1\107\1\110\1\101\1\100\1\25\1\30\1\37\1\42"+
-    "\1\36\1\50\1\41\1\32\1\35\1\56\1\51\1\40\1\44\1\46"+
-    "\1\33\1\27\1\54\1\26\1\47\1\31\1\43\1\34\1\55\1\53"+
-    "\1\45\1\52\1\104\1\111\1\105\1\114\1\74\1\114\1\25\1\30"+
-    "\1\37\1\42\1\36\1\50\1\41\1\32\1\35\1\56\1\51\1\40"+
-    "\1\44\1\46\1\33\1\27\1\54\1\26\1\47\1\31\1\43\1\34"+
-    "\1\55\1\53\1\45\1\52\3\114\1\73\1\115\52\0\1\14\2\0"+
-    "\1\3\7\0\1\14\1\0\1\7\2\0\1\14\5\0\27\14\1\0"+
-    "\37\14\1\0\u01ca\14\4\0\14\14\5\0\1\7\10\0\5\14\7\0"+
-    "\1\14\1\0\1\14\21\0\160\117\5\14\1\0\2\14\2\0\4\14"+
-    "\1\10\7\0\1\14\1\7\3\14\1\0\1\14\1\0\24\14\1\0"+
-    "\123\14\1\0\213\14\1\0\7\117\236\14\11\0\46\14\2\0\1\14"+
-    "\7\0\47\14\1\0\1\10\7\0\55\117\1\0\1\117\1\0\2\117"+
-    "\1\0\2\117\1\0\1\117\10\0\33\20\5\0\3\20\1\1\1\7"+
-    "\13\0\5\3\7\0\2\10\2\0\13\117\1\0\1\3\3\0\53\14"+
-    "\25\117\12\4\1\0\1\5\1\10\1\0\2\14\1\117\143\14\1\0"+
-    "\1\14\7\117\1\3\1\0\6\117\2\14\2\117\1\0\4\117\2\14"+
-    "\12\4\3\14\2\0\1\14\17\0\1\3\1\14\1\117\36\14\33\117"+
-    "\2\0\131\14\13\117\1\14\16\0\12\4\41\14\11\117\2\14\2\0"+
-    "\1\10\1\0\1\14\5\0\26\14\4\117\1\14\11\117\1\14\3\117"+
-    "\1\14\5\117\22\0\31\14\3\117\104\0\1\14\1\0\13\14\67\0"+
-    "\33\117\1\0\4\117\66\14\3\117\1\14\22\117\1\14\7\117\12\14"+
-    "\2\117\2\0\12\4\1\0\7\14\1\0\7\14\1\0\3\117\1\0"+
-    "\10\14\2\0\2\14\2\0\26\14\1\0\7\14\1\0\1\14\3\0"+
-    "\4\14\2\0\1\117\1\14\7\117\2\0\2\117\2\0\3\117\1\14"+
-    "\10\0\1\117\4\0\2\14\1\0\3\14\2\117\2\0\12\4\2\14"+
-    "\17\0\3\117\1\0\6\14\4\0\2\14\2\0\26\14\1\0\7\14"+
-    "\1\0\2\14\1\0\2\14\1\0\2\14\2\0\1\117\1\0\5\117"+
-    "\4\0\2\117\2\0\3\117\3\0\1\117\7\0\4\14\1\0\1\14"+
-    "\7\0\12\4\2\117\3\14\1\117\13\0\3\117\1\0\11\14\1\0"+
-    "\3\14\1\0\26\14\1\0\7\14\1\0\2\14\1\0\5\14\2\0"+
-    "\1\117\1\14\10\117\1\0\3\117\1\0\3\117\2\0\1\14\17\0"+
-    "\2\14\2\117\2\0\12\4\21\0\3\117\1\0\10\14\2\0\2\14"+
-    "\2\0\26\14\1\0\7\14\1\0\2\14\1\0\5\14\2\0\1\117"+
-    "\1\14\7\117\2\0\2\117\2\0\3\117\10\0\2\117\4\0\2\14"+
-    "\1\0\3\14\2\117\2\0\12\4\1\0\1\14\20\0\1\117\1\14"+
-    "\1\0\6\14\3\0\3\14\1\0\4\14\3\0\2\14\1\0\1\14"+
-    "\1\0\2\14\3\0\2\14\3\0\3\14\3\0\14\14\4\0\5\117"+
-    "\3\0\3\117\1\0\4\117\2\0\1\14\6\0\1\117\16\0\12\4"+
-    "\21\0\3\117\1\0\10\14\1\0\3\14\1\0\27\14\1\0\12\14"+
-    "\1\0\5\14\3\0\1\14\7\117\1\0\3\117\1\0\4\117\7\0"+
-    "\2\117\1\0\2\14\6\0\2\14\2\117\2\0\12\4\22\0\2\117"+
-    "\1\0\10\14\1\0\3\14\1\0\27\14\1\0\12\14\1\0\5\14"+
-    "\2\0\1\117\1\14\7\117\1\0\3\117\1\0\4\117\7\0\2\117"+
-    "\7\0\1\14\1\0\2\14\2\117\2\0\12\4\1\0\2\14\17\0"+
-    "\2\117\1\0\10\14\1\0\3\14\1\0\51\14\2\0\1\14\7\117"+
-    "\1\0\3\117\1\0\4\117\1\14\10\0\1\117\10\0\2\14\2\117"+
-    "\2\0\12\4\12\0\6\14\2\0\2\117\1\0\22\14\3\0\30\14"+
-    "\1\0\11\14\1\0\1\14\2\0\7\14\3\0\1\117\4\0\6\117"+
-    "\1\0\1\117\1\0\10\117\22\0\2\117\15\0\60\123\1\23\2\123"+
-    "\7\23\5\0\7\123\10\23\1\0\12\4\47\0\2\123\1\0\1\123"+
-    "\2\0\2\123\1\0\1\123\2\0\1\123\6\0\4\123\1\0\7\123"+
-    "\1\0\3\123\1\0\1\123\1\0\1\123\2\0\2\123\1\0\4\123"+
-    "\1\23\2\123\6\23\1\0\2\23\1\123\2\0\5\123\1\0\1\123"+
-    "\1\0\6\23\2\0\12\4\2\0\4\123\40\0\1\14\27\0\2\117"+
-    "\6\0\12\4\13\0\1\117\1\0\1\117\1\0\1\117\4\0\2\117"+
-    "\10\14\1\0\44\14\4\0\24\117\1\0\2\117\5\14\13\117\1\0"+
-    "\44\117\11\0\1\117\71\0\53\123\24\23\1\123\12\4\6\0\6\123"+
-    "\4\23\4\123\3\23\1\123\3\23\2\123\7\23\3\123\4\23\15\123"+
-    "\14\23\1\123\1\23\12\4\4\23\2\22\46\14\1\0\1\14\5\0"+
-    "\1\14\2\0\53\14\1\0\4\14\u0100\2\111\14\1\0\4\14\2\0"+
-    "\7\14\1\0\1\14\1\0\4\14\2\0\51\14\1\0\4\14\2\0"+
-    "\41\14\1\0\4\14\2\0\7\14\1\0\1\14\1\0\4\14\2\0"+
-    "\17\14\1\0\71\14\1\0\4\14\2\0\103\14\2\0\3\117\40\0"+
-    "\20\14\20\0\125\14\14\0\u026c\14\2\0\21\14\1\0\32\14\5\0"+
-    "\113\14\3\0\3\14\17\0\15\14\1\0\4\14\3\117\13\0\22\14"+
-    "\3\117\13\0\22\14\2\117\14\0\15\14\1\0\3\14\1\0\2\117"+
-    "\14\0\64\123\40\23\3\0\1\123\4\0\1\123\1\23\2\0\12\4"+
-    "\41\0\3\117\1\3\1\0\12\4\6\0\130\14\10\0\51\14\1\117"+
-    "\1\14\5\0\106\14\12\0\35\14\3\0\14\117\4\0\14\117\12\0"+
-    "\12\4\36\123\2\0\5\123\13\0\54\123\4\0\21\23\7\123\2\23"+
-    "\6\0\12\4\1\22\3\0\2\22\40\0\27\14\5\117\4\0\65\123"+
-    "\12\23\1\0\35\23\2\0\1\117\12\4\6\0\12\4\6\0\7\22"+
-    "\1\123\6\22\122\0\5\117\57\14\21\117\7\14\4\0\12\4\21\0"+
-    "\11\117\14\0\3\117\36\14\15\117\2\14\12\4\54\14\16\117\14\0"+
-    "\44\14\24\117\10\0\12\4\3\0\3\14\12\4\44\14\122\0\3\117"+
-    "\1\0\25\117\4\14\1\117\4\14\3\117\2\14\11\0\300\14\47\117"+
-    "\25\0\4\117\u0116\14\2\0\6\14\2\0\46\14\2\0\6\14\2\0"+
-    "\10\14\1\0\1\14\1\0\1\14\1\0\1\14\1\0\37\14\2\0"+
-    "\65\14\1\0\7\14\1\0\1\14\3\0\3\14\1\0\7\14\3\0"+
-    "\4\14\2\0\6\14\4\0\15\14\5\0\3\14\1\0\7\14\17\0"+
-    "\4\3\10\0\2\11\12\0\1\11\2\0\1\7\2\0\5\3\20\0"+
-    "\2\12\3\0\1\10\17\0\1\12\13\0\5\3\1\0\12\3\1\0"+
-    "\1\14\15\0\1\14\20\0\15\14\63\0\41\117\21\0\1\14\4\0"+
-    "\1\14\2\0\12\14\1\0\1\14\3\0\5\14\6\0\1\14\1\0"+
-    "\1\14\1\0\1\14\1\0\4\14\1\0\13\14\2\0\4\14\5\0"+
-    "\5\14\4\0\1\14\21\0\51\14\u032d\0\64\14\u0716\0\57\14\1\0"+
-    "\57\14\1\0\205\14\6\0\4\14\3\117\2\14\14\0\46\14\1\0"+
-    "\1\14\5\0\1\14\2\0\70\14\7\0\1\14\17\0\1\117\27\14"+
-    "\11\0\7\14\1\0\7\14\1\0\7\14\1\0\7\14\1\0\7\14"+
-    "\1\0\7\14\1\0\7\14\1\0\7\14\1\0\40\117\57\0\1\14"+
-    "\120\0\32\13\1\0\131\13\14\0\326\13\57\0\1\14\1\116\1\121"+
-    "\31\0\11\121\6\117\1\0\5\120\2\0\3\121\1\14\1\14\4\0"+
-    "\126\122\2\0\2\117\2\6\3\122\1\6\132\120\1\0\4\120\5\0"+
-    "\51\14\3\0\136\2\21\0\33\14\65\0\20\120\320\0\57\6\1\0"+
-    "\130\6\250\0\u19b6\121\112\0\u51cd\121\63\0\u048d\14\103\0\56\14\2\0"+
-    "\u010d\14\3\0\20\14\12\4\2\14\24\0\57\14\4\117\1\0\12\117"+
-    "\1\0\31\14\7\0\1\117\120\14\2\117\45\0\11\14\2\0\147\14"+
-    "\2\0\4\14\1\0\4\14\14\0\13\14\115\0\12\14\1\117\3\14"+
-    "\1\117\4\14\1\117\27\14\5\117\30\0\64\14\14\0\2\117\62\14"+
-    "\21\117\13\0\12\4\6\0\22\117\6\14\3\0\1\14\4\0\12\4"+
-    "\34\14\10\117\2\0\27\14\15\117\14\0\35\2\3\0\4\117\57\14"+
-    "\16\117\16\0\1\14\12\4\46\0\51\14\16\117\11\0\3\14\1\117"+
-    "\10\14\2\117\2\0\12\4\6\0\27\123\3\22\1\123\1\23\4\0"+
-    "\60\123\1\23\1\123\3\23\2\123\2\23\5\123\2\23\1\123\1\23"+
-    "\1\123\30\0\3\123\2\22\13\14\5\117\2\0\3\14\2\117\12\0"+
-    "\6\14\2\0\6\14\2\0\6\14\11\0\7\14\1\0\7\14\221\0"+
-    "\43\14\10\117\1\0\2\117\2\0\12\4\6\0\u2ba4\2\14\0\27\2"+
-    "\4\0\61\2\u2104\0\u016e\121\2\0\152\121\46\0\7\14\14\0\5\14"+
-    "\5\0\1\20\1\117\12\20\1\0\15\20\1\0\5\20\1\0\1\20"+
-    "\1\0\2\20\1\0\2\20\1\0\12\20\142\14\41\0\u016b\14\22\0"+
-    "\100\14\2\0\66\14\50\0\14\14\4\0\20\117\1\10\2\0\1\7"+
-    "\1\10\13\0\7\117\14\0\2\12\30\0\3\12\1\10\1\0\1\11"+
-    "\1\0\1\10\1\7\32\0\5\14\1\0\207\14\2\0\1\3\7\0"+
-    "\1\11\4\0\1\10\1\0\1\11\1\0\12\4\1\7\1\10\5\0"+
-    "\32\14\4\0\1\12\1\0\32\14\13\0\70\120\2\117\37\2\3\0"+
-    "\6\2\2\0\6\2\2\0\6\2\2\0\3\2\34\0\3\3\4\0"+
-    "\14\14\1\0\32\14\1\0\23\14\1\0\2\14\1\0\17\14\2\0"+
-    "\16\14\42\0\173\14\105\0\65\14\210\0\1\117\202\0\35\14\3\0"+
-    "\61\14\57\0\37\14\21\0\33\14\65\0\36\14\2\0\44\14\4\0"+
-    "\10\14\1\0\5\14\52\0\236\14\2\0\12\4\u0356\0\6\14\2\0"+
-    "\1\14\1\0\54\14\1\0\2\14\3\0\1\14\2\0\27\14\252\0"+
-    "\26\14\12\0\32\14\106\0\70\14\6\0\2\14\100\0\1\14\3\117"+
-    "\1\0\2\117\5\0\4\117\4\14\1\0\3\14\1\0\33\14\4\0"+
-    "\3\117\4\0\1\117\40\0\35\14\203\0\66\14\12\0\26\14\12\0"+
-    "\23\14\215\0\111\14\u03b7\0\3\117\65\14\17\117\37\0\12\4\20\0"+
-    "\3\117\55\14\13\117\2\0\1\3\22\0\31\14\7\0\12\4\6\0"+
-    "\3\117\44\14\16\117\1\0\12\4\100\0\3\117\60\14\16\117\4\14"+
-    "\13\0\12\4\u04a6\0\53\14\15\117\10\0\12\4\u0936\0\u036f\14\221\0"+
-    "\143\14\u0b9d\0\u042f\14\u33d1\0\u0239\14\u04c7\0\105\14\13\0\1\14\56\117"+
-    "\20\0\4\117\15\14\u4060\0\1\120\1\122\u2163\0\5\117\3\0\6\117"+
-    "\10\3\10\117\2\0\7\117\36\0\4\117\224\0\3\117\u01bb\0\125\14"+
-    "\1\0\107\14\1\0\2\14\2\0\1\14\2\0\2\14\2\0\4\14"+
-    "\1\0\14\14\1\0\1\14\1\0\7\14\1\0\101\14\1\0\4\14"+
-    "\2\0\10\14\1\0\7\14\1\0\34\14\1\0\4\14\1\0\5\14"+
-    "\1\0\1\14\3\0\7\14\1\0\u0154\14\2\0\31\14\1\0\31\14"+
-    "\1\0\37\14\1\0\31\14\1\0\37\14\1\0\31\14\1\0\37\14"+
-    "\1\0\31\14\1\0\37\14\1\0\31\14\1\0\10\14\2\0\62\4"+
-    "\u1600\0\4\14\1\0\33\14\1\0\2\14\1\0\1\14\2\0\1\14"+
-    "\1\0\12\14\1\0\4\14\1\0\1\14\1\0\1\14\6\0\1\14"+
-    "\4\0\1\14\1\0\1\14\1\0\1\14\1\0\3\14\1\0\2\14"+
-    "\1\0\1\14\2\0\1\14\1\0\1\14\1\0\1\14\1\0\1\14"+
-    "\1\0\1\14\1\0\2\14\1\0\1\14\2\0\4\14\1\0\7\14"+
-    "\1\0\4\14\1\0\4\14\1\0\1\14\1\0\12\14\1\0\21\14"+
-    "\5\0\3\14\1\0\5\14\1\0\21\14\u032a\0\32\21\1\15\u0dff\0"+
-    "\ua6d7\121\51\0\u1035\121\13\0\336\121\u3fe2\0\u021e\121\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
-    "\1\3\36\0\140\3\200\0\360\117\uffff\0\uffff\0\ufe12\0";
+    "\1\125\10\123\2\125\2\123\1\125\23\123\1\126\1\34\1\7\1\126"+
+    "\1\110\1\106\1\33\2\111\1\112\1\126\1\113\1\73\1\40\1\116"+
+    "\1\101\1\74\1\77\1\76\1\75\1\22\1\104\1\102\1\103\1\100"+
+    "\1\105\1\121\1\123\1\122\1\123\1\115\1\114\1\41\1\44\1\53"+
+    "\1\56\1\52\1\64\1\55\1\46\1\51\1\72\1\65\1\54\1\60"+
+    "\1\62\1\47\1\43\1\70\1\42\1\63\1\45\1\57\1\50\1\71"+
+    "\1\67\1\61\1\66\1\117\1\124\1\120\1\127\1\107\1\127\1\41"+
+    "\1\44\1\53\1\56\1\52\1\64\1\55\1\46\1\51\1\72\1\65"+
+    "\1\54\1\60\1\62\1\47\1\43\1\70\1\42\1\63\1\45\1\57"+
+    "\1\50\1\71\1\67\1\61\1\66\3\127\1\106\1\130\51\0\1\5"+
+    "\1\31\2\0\1\1\1\5\6\0\1\31\1\0\1\24\2\0\1\31"+
+    "\5\0\27\31\1\0\37\31\1\0\u01ca\31\4\0\14\31\5\0\1\24"+
+    "\10\0\5\31\7\0\1\31\1\0\1\31\21\0\160\132\5\31\1\0"+
+    "\2\31\2\0\4\31\1\25\1\31\6\0\1\31\1\24\3\31\1\0"+
+    "\1\31\1\0\24\31\1\0\123\31\1\0\213\31\1\0\7\132\246\31"+
+    "\1\0\46\31\2\0\1\31\7\0\47\31\1\0\1\25\7\0\55\132"+
+    "\1\0\1\132\1\0\2\132\1\0\2\132\1\0\1\132\10\0\33\35"+
+    "\5\0\3\35\1\15\1\24\13\0\6\1\6\0\2\25\2\0\13\132"+
+    "\1\0\1\1\3\0\53\31\25\132\12\20\1\0\1\21\1\25\1\0"+
+    "\2\31\1\132\143\31\1\0\1\31\7\132\1\1\1\0\6\132\2\31"+
+    "\2\132\1\0\4\132\2\31\12\20\3\31\2\0\1\31\17\0\1\1"+
+    "\1\31\1\132\36\31\33\132\2\0\131\31\13\132\1\31\16\0\12\20"+
+    "\41\31\11\132\2\31\2\0\1\25\1\0\1\31\5\0\26\31\4\132"+
+    "\1\31\11\132\1\31\3\132\1\31\5\132\22\0\31\31\3\132\104\0"+
+    "\25\31\1\0\10\31\26\0\16\132\1\1\41\132\66\31\3\132\1\31"+
+    "\22\132\1\31\7\132\12\31\2\132\2\0\12\20\1\0\20\31\3\132"+
+    "\1\0\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\1\31"+
+    "\3\0\4\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132"+
+    "\1\31\10\0\1\132\4\0\2\31\1\0\3\31\2\132\2\0\12\20"+
+    "\2\31\17\0\3\132\1\0\6\31\4\0\2\31\2\0\26\31\1\0"+
+    "\7\31\1\0\2\31\1\0\2\31\1\0\2\31\2\0\1\132\1\0"+
+    "\5\132\4\0\2\132\2\0\3\132\3\0\1\132\7\0\4\31\1\0"+
+    "\1\31\7\0\12\20\2\132\3\31\1\132\13\0\3\132\1\0\11\31"+
+    "\1\0\3\31\1\0\26\31\1\0\7\31\1\0\2\31\1\0\5\31"+
+    "\2\0\1\132\1\31\10\132\1\0\3\132\1\0\3\132\2\0\1\31"+
+    "\17\0\2\31\2\132\2\0\12\20\11\0\1\31\7\0\3\132\1\0"+
+    "\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\2\31\1\0"+
+    "\5\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132\10\0"+
+    "\2\132\4\0\2\31\1\0\3\31\2\132\2\0\12\20\1\0\1\31"+
+    "\20\0\1\132\1\31\1\0\6\31\3\0\3\31\1\0\4\31\3\0"+
+    "\2\31\1\0\1\31\1\0\2\31\3\0\2\31\3\0\3\31\3\0"+
+    "\14\31\4\0\5\132\3\0\3\132\1\0\4\132\2\0\1\31\6\0"+
+    "\1\132\16\0\12\20\20\0\4\132\1\0\10\31\1\0\3\31\1\0"+
+    "\27\31\1\0\20\31\3\0\1\31\7\132\1\0\3\132\1\0\4\132"+
+    "\7\0\2\132\1\0\3\31\5\0\2\31\2\132\2\0\12\20\20\0"+
+    "\1\31\3\132\1\0\10\31\1\0\3\31\1\0\27\31\1\0\12\31"+
+    "\1\0\5\31\2\0\1\132\1\31\7\132\1\0\3\132\1\0\4\132"+
+    "\7\0\2\132\7\0\1\31\1\0\2\31\2\132\2\0\12\20\1\0"+
+    "\2\31\16\0\3\132\1\0\10\31\1\0\3\31\1\0\51\31\2\0"+
+    "\1\31\7\132\1\0\3\132\1\0\4\132\1\31\5\0\3\31\1\132"+
+    "\7\0\3\31\2\132\2\0\12\20\12\0\6\31\2\0\2\132\1\0"+
+    "\22\31\3\0\30\31\1\0\11\31\1\0\1\31\2\0\7\31\3\0"+
+    "\1\132\4\0\6\132\1\0\1\132\1\0\10\132\6\0\12\20\2\0"+
+    "\2\132\15\0\60\137\1\37\2\137\7\37\5\0\7\137\10\37\1\0"+
+    "\12\20\47\0\2\137\1\0\1\137\2\0\2\137\1\0\1\137\2\0"+
+    "\1\137\6\0\4\137\1\0\7\137\1\0\3\137\1\0\1\137\1\0"+
+    "\1\137\2\0\2\137\1\0\4\137\1\37\2\137\6\37\1\0\2\37"+
+    "\1\137\2\0\5\137\1\0\1\137\1\0\6\37\2\0\12\20\2\0"+
+    "\4\137\40\0\1\31\27\0\2\132\6\0\12\20\13\0\1\132\1\0"+
+    "\1\132\1\0\1\132\4\0\2\132\10\31\1\0\44\31\4\0\24\132"+
+    "\1\0\2\132\5\31\13\132\1\0\44\132\11\0\1\132\71\0\53\137"+
+    "\24\37\1\137\12\20\6\0\6\137\4\37\4\137\3\37\1\137\3\37"+
+    "\2\137\7\37\3\137\4\37\15\137\14\37\1\137\1\37\12\20\4\37"+
+    "\2\36\46\31\1\0\1\31\5\0\1\31\2\0\53\31\1\0\4\31"+
+    "\u0100\17\111\31\1\0\4\31\2\0\7\31\1\0\1\31\1\0\4\31"+
+    "\2\0\51\31\1\0\4\31\2\0\41\31\1\0\4\31\2\0\7\31"+
+    "\1\0\1\31\1\0\4\31\2\0\17\31\1\0\71\31\1\0\4\31"+
+    "\2\0\103\31\2\0\3\132\40\0\20\31\20\0\126\31\2\0\6\31"+
+    "\3\0\u026c\31\2\0\21\31\1\0\32\31\5\0\113\31\3\0\13\31"+
+    "\7\0\15\31\1\0\4\31\3\132\13\0\22\31\3\132\13\0\22\31"+
+    "\2\132\14\0\15\31\1\0\3\31\1\0\2\132\14\0\64\137\40\37"+
+    "\3\0\1\137\4\0\1\137\1\37\2\0\12\20\41\0\3\132\1\1"+
+    "\1\0\12\20\6\0\130\31\10\0\5\31\2\132\42\31\1\132\1\31"+
+    "\5\0\106\31\12\0\37\31\1\0\14\132\4\0\14\132\12\0\12\20"+
+    "\36\137\2\0\5\137\13\0\54\137\4\0\32\137\6\0\12\20\1\36"+
+    "\3\0\2\36\40\0\27\31\5\132\4\0\65\137\12\37\1\0\35\37"+
+    "\2\0\1\132\12\20\6\0\12\20\6\0\7\36\1\137\6\36\2\0"+
+    "\17\132\101\0\5\132\57\31\21\132\7\31\4\0\12\20\21\0\11\132"+
+    "\14\0\3\132\36\31\15\132\2\31\12\20\54\31\16\132\14\0\44\31"+
+    "\24\132\10\0\12\20\3\0\3\31\12\20\44\31\2\0\11\31\107\0"+
+    "\3\132\1\0\25\132\4\31\1\132\4\31\3\132\2\31\1\0\2\132"+
+    "\6\0\300\31\66\132\5\0\5\132\u0116\31\2\0\6\31\2\0\46\31"+
+    "\2\0\6\31\2\0\10\31\1\0\1\31\1\0\1\31\1\0\1\31"+
+    "\1\0\37\31\2\0\65\31\1\0\7\31\1\0\1\31\3\0\3\31"+
+    "\1\0\7\31\3\0\4\31\2\0\6\31\4\0\15\31\5\0\3\31"+
+    "\1\0\7\31\17\0\1\1\1\12\2\1\10\0\2\26\12\0\1\26"+
+    "\2\0\1\24\2\0\5\1\1\27\14\0\1\5\2\0\2\134\3\0"+
+    "\1\25\4\0\1\5\12\0\1\134\13\0\5\1\1\0\12\1\1\0"+
+    "\1\31\15\0\1\31\20\0\15\31\63\0\23\132\1\10\15\132\21\0"+
+    "\1\31\4\0\1\31\2\0\12\31\1\0\1\31\3\0\5\31\4\0"+
+    "\1\5\1\0\1\31\1\0\1\31\1\0\1\31\1\0\4\31\1\0"+
+    "\12\31\1\16\2\0\4\31\5\0\5\31\4\0\1\31\21\0\51\31"+
+    "\13\0\6\5\17\0\2\5\u016f\0\2\5\14\0\1\5\137\0\1\5"+
+    "\106\0\1\5\31\0\13\5\4\0\3\5\273\0\14\31\1\16\47\31"+
+    "\300\0\2\5\12\0\1\5\11\0\1\5\72\0\4\5\1\0\5\5"+
+    "\1\5\1\0\7\5\1\5\2\5\1\5\1\5\1\0\2\5\2\5"+
+    "\1\5\4\5\1\4\2\5\1\5\1\5\2\5\2\5\1\5\3\5"+
+    "\1\5\3\5\2\5\10\5\3\5\5\5\1\5\1\5\1\5\5\5"+
+    "\14\5\13\5\2\5\2\5\1\5\1\5\2\5\1\5\1\5\22\5"+
+    "\1\5\2\5\2\5\6\5\12\0\2\5\6\5\1\5\1\5\1\5"+
+    "\2\5\3\5\2\5\10\5\2\5\4\5\2\5\13\5\2\5\5\5"+
+    "\2\5\2\5\1\5\5\5\2\5\1\5\1\5\1\5\2\5\24\5"+
+    "\2\5\5\5\6\5\1\5\2\5\1\4\1\5\2\5\1\5\4\5"+
+    "\1\5\2\5\1\5\2\0\2\5\4\4\1\5\1\5\2\5\1\5"+
+    "\1\0\1\5\1\0\1\5\6\0\1\5\3\0\1\5\6\0\1\5"+
+    "\12\0\2\5\17\0\1\5\2\0\1\5\4\0\1\5\1\0\1\5"+
+    "\4\0\3\5\1\0\1\5\13\0\2\5\3\5\55\0\3\5\11\0"+
+    "\1\5\16\0\1\5\16\0\1\5\u0174\0\2\5\u01cf\0\3\5\23\0"+
+    "\2\5\63\0\1\5\4\0\1\5\252\0\57\31\1\0\57\31\1\0"+
+    "\205\31\6\0\4\31\3\132\2\31\14\0\46\31\1\0\1\31\5\0"+
+    "\1\31\2\0\70\31\7\0\1\31\17\0\1\132\27\31\11\0\7\31"+
+    "\1\0\7\31\1\0\7\31\1\0\7\31\1\0\7\31\1\0\7\31"+
+    "\1\0\7\31\1\0\7\31\1\0\40\132\57\0\1\31\120\0\32\30"+
+    "\1\0\131\30\14\0\326\30\57\0\1\31\1\131\1\135\31\0\11\135"+
+    "\6\132\1\5\5\133\2\0\3\135\1\31\1\31\1\5\3\0\126\136"+
+    "\2\0\2\132\2\23\3\136\1\23\132\133\1\0\4\133\5\0\51\31"+
+    "\3\0\136\17\21\0\33\31\65\0\20\133\227\0\1\5\1\0\1\5"+
+    "\66\0\57\23\1\0\130\23\250\0\u19b6\135\112\0\u51d6\135\52\0\u048d\31"+
+    "\103\0\56\31\2\0\u010d\31\3\0\20\31\12\20\2\31\24\0\57\31"+
+    "\4\132\1\0\12\132\1\0\37\31\2\132\120\31\2\132\45\0\11\31"+
+    "\2\0\147\31\2\0\44\31\1\0\10\31\77\0\13\31\1\132\3\31"+
+    "\1\132\4\31\1\132\27\31\5\132\30\0\64\31\14\0\2\132\62\31"+
+    "\22\132\12\0\12\20\6\0\22\132\6\31\3\0\1\31\1\0\1\31"+
+    "\2\0\12\20\34\31\10\132\2\0\27\31\15\132\14\0\35\17\3\0"+
+    "\4\132\57\31\16\132\16\0\1\31\12\20\6\0\5\137\1\37\12\137"+
+    "\12\20\5\137\1\0\51\31\16\132\11\0\3\31\1\132\10\31\2\132"+
+    "\2\0\12\20\6\0\27\137\3\36\1\137\3\37\62\137\1\37\1\137"+
+    "\3\37\2\137\2\37\5\137\2\37\1\137\1\37\1\137\30\0\3\137"+
+    "\2\36\13\31\5\132\2\0\3\31\2\132\12\0\6\31\2\0\6\31"+
+    "\2\0\6\31\11\0\7\31\1\0\7\31\1\0\53\31\1\0\12\31"+
+    "\12\0\163\31\10\132\1\0\2\132\2\0\12\20\6\0\u2ba4\17\14\0"+
+    "\27\17\4\0\61\17\u2104\0\u016e\135\2\0\152\135\46\0\7\31\14\0"+
+    "\5\31\5\0\1\35\1\132\12\35\1\0\15\35\1\0\5\35\1\0"+
+    "\1\35\1\0\2\35\1\0\2\35\1\0\12\35\142\31\41\0\u016b\31"+
+    "\22\0\100\31\2\0\66\31\50\0\14\31\4\0\16\132\1\6\1\11"+
+    "\1\25\2\0\1\24\1\25\13\0\20\132\3\0\2\134\30\0\3\134"+
+    "\1\25\1\0\1\26\1\0\1\25\1\24\32\0\5\31\1\0\207\31"+
+    "\2\0\1\1\7\0\1\26\4\0\1\25\1\0\1\26\1\0\12\20"+
+    "\1\24\1\25\5\0\32\31\4\0\1\134\1\0\32\31\13\0\70\133"+
+    "\2\132\37\17\3\0\6\17\2\0\6\17\2\0\6\17\2\0\3\17"+
+    "\34\0\3\1\4\0\14\31\1\0\32\31\1\0\23\31\1\0\2\31"+
+    "\1\0\17\31\2\0\16\31\42\0\173\31\105\0\65\31\210\0\1\132"+
+    "\202\0\35\31\3\0\61\31\17\0\1\132\37\0\40\31\20\0\33\31"+
+    "\5\0\46\31\5\132\5\0\36\31\2\0\44\31\4\0\10\31\1\0"+
+    "\5\31\52\0\236\31\2\0\12\20\6\0\44\31\4\0\44\31\4\0"+
+    "\50\31\10\0\64\31\234\0\u0137\31\11\0\26\31\12\0\10\31\230\0"+
+    "\6\31\2\0\1\31\1\0\54\31\1\0\2\31\3\0\1\31\2\0"+
+    "\27\31\12\0\27\31\11\0\37\31\101\0\23\31\1\0\2\31\12\0"+
+    "\26\31\12\0\32\31\106\0\70\31\6\0\2\31\100\0\1\31\3\132"+
+    "\1\0\2\132\5\0\4\132\4\31\1\0\3\31\1\0\33\31\4\0"+
+    "\3\132\4\0\1\132\40\0\35\31\3\0\35\31\43\0\10\31\1\0"+
+    "\34\31\2\132\31\0\66\31\12\0\26\31\12\0\23\31\15\0\22\31"+
+    "\156\0\111\31\67\0\63\31\15\0\63\31\u030d\0\3\132\65\31\17\132"+
+    "\37\0\12\20\17\0\4\132\55\31\13\132\2\0\1\1\22\0\31\31"+
+    "\7\0\12\20\6\0\3\132\44\31\16\132\1\0\12\20\20\0\43\31"+
+    "\1\132\2\0\1\31\11\0\3\132\60\31\16\132\4\31\5\0\3\132"+
+    "\3\0\12\20\1\31\1\0\1\31\43\0\22\31\1\0\31\31\14\132"+
+    "\6\0\1\132\101\0\7\31\1\0\1\31\1\0\4\31\1\0\17\31"+
+    "\1\0\12\31\7\0\57\31\14\132\5\0\12\20\6\0\4\132\1\0"+
+    "\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\2\31\1\0"+
+    "\5\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132\2\0"+
+    "\1\31\6\0\1\132\5\0\5\31\2\132\2\0\7\132\3\0\5\132"+
+    "\213\0\65\31\22\132\4\31\5\0\12\20\46\0\60\31\24\132\2\31"+
+    "\1\0\1\31\10\0\12\20\246\0\57\31\7\132\2\0\11\132\27\0"+
+    "\4\31\2\132\42\0\60\31\21\132\3\0\1\31\13\0\12\20\46\0"+
+    "\53\31\15\132\10\0\12\20\66\0\32\137\3\0\17\37\4\0\12\20"+
+    "\2\36\3\0\1\36\u0160\0\100\31\12\20\25\0\1\31\u01c0\0\71\31"+
+    "\u0107\0\11\31\1\0\45\31\10\132\1\0\10\132\1\31\17\0\12\20"+
+    "\30\0\36\31\2\0\26\132\1\0\16\132\u0349\0\u039a\31\146\0\157\31"+
+    "\21\0\304\31\u0abc\0\u042f\31\u0fd1\0\u0247\31\u21b9\0\u0239\31\7\0\37\31"+
+    "\1\0\12\20\146\0\36\31\2\0\5\132\13\0\60\31\7\132\11\0"+
+    "\4\31\14\0\12\20\11\0\25\31\5\0\23\31\u0370\0\105\31\13\0"+
+    "\1\31\56\132\20\0\4\132\15\31\100\0\1\31\37\0\u17ed\131\23\0"+
+    "\u02f3\131\u250d\0\1\133\1\136\u0bfe\0\153\31\5\0\15\31\3\0\11\31"+
+    "\7\0\12\31\3\0\2\132\1\0\4\1\u14c1\0\5\132\3\0\6\132"+
+    "\10\1\10\132\2\0\7\132\36\0\4\132\224\0\3\132\u01bb\0\125\31"+
+    "\1\0\107\31\1\0\2\31\2\0\1\31\2\0\2\31\2\0\4\31"+
+    "\1\0\14\31\1\0\1\31\1\0\7\31\1\0\101\31\1\0\4\31"+
+    "\2\0\10\31\1\0\7\31\1\0\34\31\1\0\4\31\1\0\5\31"+
+    "\1\0\1\31\3\0\7\31\1\0\u0154\31\2\0\31\31\1\0\31\31"+
+    "\1\0\37\31\1\0\31\31\1\0\37\31\1\0\31\31\1\0\37\31"+
+    "\1\0\31\31\1\0\37\31\1\0\31\31\1\0\10\31\2\0\62\20"+
+    "\u0200\0\67\132\4\0\62\132\10\0\1\132\16\0\1\132\26\0\5\132"+
+    "\1\0\17\132\u0550\0\7\132\1\0\21\132\2\0\7\132\1\0\2\132"+
+    "\1\0\5\132\u07d5\0\305\31\13\0\7\132\51\0\104\31\7\132\5\0"+
+    "\12\20\u04a6\0\4\31\1\0\33\31\1\0\2\31\1\0\1\31\2\0"+
+    "\1\31\1\0\12\31\1\0\4\31\1\0\1\31\1\0\1\31\6\0"+
+    "\1\31\4\0\1\31\1\0\1\31\1\0\1\31\1\0\3\31\1\0"+
+    "\2\31\1\0\1\31\2\0\1\31\1\0\1\31\1\0\1\31\1\0"+
+    "\1\31\1\0\1\31\1\0\2\31\1\0\1\31\2\0\4\31\1\0"+
+    "\7\31\1\0\4\31\1\0\4\31\1\0\1\31\1\0\12\31\1\0"+
+    "\21\31\5\0\3\31\1\0\5\31\1\0\21\31\u0144\0\4\5\1\5"+
+    "\312\5\1\5\60\5\15\0\3\5\37\0\1\5\32\31\6\0\32\31"+
+    "\2\0\4\5\2\16\14\31\2\16\12\31\4\0\1\5\2\0\12\5"+
+    "\22\0\71\5\32\2\1\32\2\5\15\5\12\0\1\5\24\0\1\5"+
+    "\2\0\11\5\1\0\4\5\11\0\7\5\2\5\256\5\42\5\2\5"+
+    "\141\5\1\4\16\5\2\5\2\5\1\5\3\5\2\5\44\5\3\4"+
+    "\2\5\1\4\2\5\3\4\44\5\2\5\3\5\1\5\4\5\5\3"+
+    "\102\5\2\4\2\5\13\4\25\5\4\4\4\5\1\4\1\5\11\4"+
+    "\3\5\1\4\4\5\3\4\1\5\3\4\42\5\1\4\123\5\1\5"+
+    "\77\5\10\0\3\5\6\5\1\5\30\5\7\5\2\5\2\5\1\5"+
+    "\2\4\4\5\1\4\14\5\1\5\2\5\4\5\2\5\1\4\4\5"+
+    "\2\4\15\5\2\5\2\5\1\5\10\5\2\5\11\5\1\5\5\5"+
+    "\3\5\14\5\3\5\10\5\3\5\2\5\1\5\1\5\1\5\4\5"+
+    "\1\5\6\5\1\5\3\5\1\5\6\5\113\5\3\4\3\5\5\4"+
+    "\60\0\43\5\1\4\20\5\3\4\11\5\1\4\5\5\5\5\1\5"+
+    "\1\4\6\5\15\5\6\5\3\5\1\5\1\5\2\5\3\5\1\5"+
+    "\2\5\7\5\6\5\164\0\14\5\125\0\53\5\14\0\4\5\70\0"+
+    "\10\5\12\0\6\5\50\0\10\5\36\0\122\5\14\0\4\5\10\5"+
+    "\5\4\1\5\2\4\6\5\1\4\11\5\12\4\1\5\1\0\1\5"+
+    "\2\4\1\5\6\5\1\0\52\5\2\5\4\5\3\5\1\5\1\5"+
+    "\47\5\15\5\5\5\2\4\1\5\2\4\6\5\3\5\15\5\1\5"+
+    "\15\4\42\5\u05fe\5\2\0\ua6d7\135\51\0\u1035\135\13\0\336\135\2\0"+
+    "\u1682\135\u295e\0\u021e\135\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
+    "\1\1\36\0\137\13\1\14\200\0\360\132\uffff\0\uffff\0\ufe12\0";
 
   /** 
    * Translates characters to character classes
@@ -231,27 +297,85 @@ public final class UAX29URLEmailTokenizerImpl {
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\2\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
-    "\1\7\2\1\1\2\1\1\1\10\4\2\3\4\2\1"+
-    "\4\2\3\4\1\1\1\2\1\0\1\2\1\0\1\4"+
-    "\1\0\1\2\6\0\1\2\2\0\1\1\3\0\6\2"+
-    "\2\0\3\4\1\2\1\4\5\0\5\2\1\0\2\4"+
-    "\6\0\32\2\3\0\5\2\32\0\4\4\5\0\32\2"+
-    "\2\0\4\2\32\0\4\4\5\0\1\11\1\0\1\2"+
-    "\1\12\2\2\2\12\1\2\10\12\1\2\2\12\1\2"+
-    "\4\12\1\2\1\12\1\2\2\12\2\2\6\12\2\2"+
-    "\1\12\1\2\1\12\1\2\11\12\3\2\3\12\1\2"+
-    "\3\12\2\2\2\12\1\2\3\12\7\2\1\12\10\2"+
-    "\1\12\4\2\1\12\1\2\2\12\1\2\2\12\2\2"+
-    "\1\12\1\2\1\12\1\2\3\12\3\2\1\12\1\2"+
-    "\1\12\1\2\1\12\1\2\1\12\2\2\1\12\2\2"+
-    "\2\12\1\2\3\12\1\2\5\12\1\2\1\12\1\2"+
-    "\1\12\3\2\4\12\1\2\6\12\4\2\1\12\2\2"+
-    "\1\12\6\2\1\12\1\2\2\12\1\2\5\12\3\2"+
-    "\2\12\2\2\5\12\1\2\14\12\1\2\1\12\2\2"+
-    "\3\12\5\2\2\12\1\2\2\12\2\2\2\12\26\2"+
-    "\3\12\3\2\1\12\1\2\2\0\2\2\1\0\1\2"+
-    "\1\0\1\12\2\0\2\12\1\0\10\12\1\0\2\12"+
+    "\2\0\2\1\3\2\2\1\1\3\1\2\1\4\2\5"+
+    "\1\6\1\1\1\7\1\10\2\1\1\3\1\11\4\3"+
+    "\2\5\2\1\1\5\4\3\2\5\1\1\1\0\5\2"+
+    "\2\0\1\2\4\0\1\3\1\0\1\3\2\2\1\0"+
+    "\2\5\1\2\2\5\3\0\1\5\1\0\1\3\3\0"+
+    "\1\3\1\0\1\3\2\0\5\3\3\5\1\3\3\0"+
+    "\1\5\2\0\1\3\1\0\4\3\2\5\2\0\2\2"+
+    "\2\0\1\2\3\0\2\3\2\2\2\5\32\0\2\5"+
+    "\4\0\37\3\5\0\2\5\32\0\2\5\2\0\36\3"+
+    "\3\0\3\2\1\0\1\2\2\0\1\12\1\0\1\3"+
+    "\3\2\1\0\1\5\3\0\1\13\2\0\2\13\1\0"+
+    "\10\13\1\0\2\13\1\0\4\13\1\0\1\13\1\0"+
+    "\2\13\2\0\6\13\2\0\1\13\1\0\1\13\1\0"+
+    "\11\13\3\0\3\13\1\0\3\13\2\0\2\13\1\0"+
+    "\3\13\7\0\1\13\10\0\1\13\4\0\1\13\1\0"+
+    "\2\13\1\0\2\13\2\0\1\13\1\0\1\13\1\0"+
+    "\3\13\3\0\1\13\1\0\1\13\1\0\1\13\1\0"+
+    "\1\13\2\0\1\13\2\0\2\13\1\0\3\13\1\0"+
+    "\5\13\1\0\1\13\1\0\1\13\3\0\4\13\1\0"+
+    "\6\13\4\0\1\13\2\0\1\13\6\0\1\13\1\0"+
+    "\2\13\1\0\5\13\3\0\2\13\2\0\5\13\1\0"+
+    "\14\13\1\0\1\13\2\0\3\13\5\0\2\13\1\0"+
+    "\2\13\2\0\2\13\26\0\3\13\3\0\1\13\1\0"+
+    "\1\5\1\3\1\13\2\3\2\13\1\3\10\13\1\3"+
+    "\2\13\1\3\4\13\1\3\1\13\1\3\2\13\2\3"+
+    "\6\13\2\3\1\13\1\3\1\13\1\3\11\13\3\3"+
+    "\3\13\1\3\3\13\2\3\2\13\1\3\3\13\7\3"+
+    "\1\13\10\3\1\13\4\3\1\13\1\3\2\13\1\3"+
+    "\2\13\2\3\1\13\1\3\1\13\1\3\3\13\3\3"+
+    "\1\13\1\3\1\13\1\3\1\13\1\3\1\13\2\3"+
+    "\1\13\2\3\2\13\1\3\3\13\1\3\5\13\1\3"+
+    "\1\13\1\3\1\13\3\3\4\13\1\3\6\13\4\3"+
+    "\1\13\2\3\1\13\6\3\1\13\1\3\2\13\1\3"+
+    "\5\13\3\3\2\13\2\3\5\13\1\3\14\13\1\3"+
+    "\1\13\2\3\3\13\5\3\2\13\1\3\2\13\2\3"+
+    "\2\13\26\3\3\13\3\3\1\13\1\3\1\0\2\3"+
+    "\1\0\1\3\7\0\1\13\1\0\1\5\375\0\1\5"+
+    "\376\3\42\0\3\5\1\14\1\15\1\16\1\15\2\14"+
+    "\5\15\1\14\1\15\1\14\2\16\1\0\1\13\1\0"+
+    "\1\13\2\0\2\15\1\0\26\15\3\0\5\15\3\0"+
+    "\4\15\2\0\7\15\2\13\4\15\1\0\6\15\1\13"+
+    "\2\15\1\13\4\15\5\0\1\13\2\0\3\15\1\13"+
+    "\13\15\1\13\12\15\6\0\1\15\3\0\13\15\5\0"+
+    "\1\13\4\0\2\15\3\0\1\13\14\0\1\15\4\0"+
+    "\1\13\4\0\1\15\1\13\2\0\1\13\1\0\2\15"+
+    "\3\0\11\15\1\0\2\15\1\0\1\15\1\0\3\15"+
+    "\1\13\4\15\1\13\2\15\2\0\2\15\1\0\1\15"+
+    "\1\0\1\13\1\15\5\0\2\15\4\0\1\13\1\15"+
+    "\3\13\21\15\1\13\6\15\1\0\7\15\1\13\5\15"+
+    "\2\13\4\0\12\15\4\0\4\15\1\13\6\15\1\13"+
+    "\2\0\12\15\4\0\4\15\5\0\6\15\7\0\5\15"+
+    "\1\13\6\15\1\13\2\15\1\0\1\13\2\0\4\15"+
+    "\1\13\1\15\1\13\3\15\5\0\1\13\1\0\5\15"+
+    "\1\13\3\15\1\13\2\15\1\13\5\15\3\0\3\15"+
+    "\1\13\20\15\1\13\16\15\1\13\6\0\3\15\1\13"+
+    "\2\15\1\13\3\15\11\0\1\15\3\0\3\15\1\0"+
+    "\2\15\1\13\16\0\1\13\3\0\1\13\1\0\5\15"+
+    "\2\0\1\14\2\15\1\14\1\15\3\14\1\15\1\14"+
+    "\5\15\1\14\2\15\1\3\1\13\1\3\1\13\2\3"+
+    "\2\15\1\3\26\15\3\3\5\15\3\3\4\15\2\3"+
+    "\7\15\2\13\4\15\1\3\6\15\1\13\2\15\1\13"+
+    "\4\15\5\3\1\13\2\3\3\15\1\13\13\15\1\13"+
+    "\12\15\6\3\1\15\3\3\13\15\5\3\1\13\4\3"+
+    "\2\15\3\3\1\13\14\3\1\15\4\3\1\13\4\3"+
+    "\1\15\1\13\2\3\1\13\1\3\2\15\3\3\11\15"+
+    "\1\3\2\15\1\3\1\15\1\3\3\15\1\13\4\15"+
+    "\1\13\2\15\2\3\2\15\1\3\1\15\1\3\1\13"+
+    "\1\15\5\3\2\15\4\3\1\13\1\15\3\13\21\15"+
+    "\1\13\6\15\1\3\7\15\1\13\5\15\2\13\4\3"+
+    "\12\15\4\3\4\15\1\13\6\15\1\13\2\3\12\15"+
+    "\4\3\4\15\5\3\6\15\7\3\5\15\1\13\6\15"+
+    "\1\13\2\15\1\3\1\13\2\3\4\15\1\13\1\15"+
+    "\1\13\3\15\5\3\1\13\1\3\5\15\1\13\3\15"+
+    "\1\13\2\15\1\13\5\15\3\3\3\15\1\13\20\15"+
+    "\1\13\16\15\1\13\6\3\3\15\1\13\2\15\1\13"+
+    "\3\15\11\3\1\15\3\3\3\15\1\3\2\15\1\13"+
+    "\15\3\1\13\3\3\1\13\1\3\5\15\3\3\1\0"+
+    "\1\3\17\0\3\16\3\5\1\14\u01d9\0\1\14\u01da\3"+
+    "\16\0\1\12\2\0\2\12\1\0\10\12\1\0\2\12"+
     "\1\0\4\12\1\0\1\12\1\0\2\12\2\0\6\12"+
     "\2\0\1\12\1\0\1\12\1\0\11\12\3\0\3\12"+
     "\1\0\3\12\2\0\2\12\1\0\3\12\7\0\1\12"+
@@ -263,186 +387,128 @@ public final class UAX29URLEmailTokenizerImpl {
     "\2\0\1\12\6\0\1\12\1\0\2\12\1\0\5\12"+
     "\3\0\2\12\2\0\5\12\1\0\14\12\1\0\1\12"+
     "\2\0\3\12\5\0\2\12\1\0\2\12\2\0\2\12"+
-    "\26\0\3\12\3\0\1\12\2\0\2\4\10\0\1\12"+
-    "\373\2\1\0\3\2\374\0\2\4\43\0\2\13\2\14"+
-    "\2\13\1\14\1\13\1\14\1\13\7\14\2\13\1\14"+
-    "\1\13\3\15\1\14\1\2\1\12\1\2\1\12\2\2"+
-    "\2\14\1\2\26\14\3\2\5\14\3\2\4\14\2\2"+
-    "\7\14\2\12\4\14\1\2\6\14\1\12\2\14\1\12"+
-    "\4\14\5\2\1\12\2\2\3\14\1\12\13\14\1\12"+
-    "\12\14\6\2\1\14\3\2\13\14\5\2\1\12\4\2"+
-    "\2\14\3\2\1\12\14\2\1\14\4\2\1\12\4\2"+
-    "\1\14\1\12\2\2\1\12\1\2\2\14\3\2\11\14"+
-    "\1\2\2\14\1\2\1\14\1\2\3\14\1\12\4\14"+
-    "\1\12\2\14\2\2\2\14\1\2\1\14\1\2\1\12"+
-    "\1\14\5\2\2\14\4\2\1\12\1\14\3\12\21\14"+
-    "\1\12\6\14\1\2\7\14\1\12\5\14\2\12\4\2"+
-    "\12\14\4\2\4\14\1\12\6\14\1\12\2\2\12\14"+
-    "\4\2\4\14\5\2\6\14\7\2\5\14\1\12\6\14"+
-    "\1\12\2\14\1\2\1\12\2\2\4\14\1\12\1\14"+
-    "\1\12\3\14\5\2\1\12\1\2\5\14\1\12\3\14"+
-    "\1\12\2\14\1\12\5\14\3\2\3\14\1\12\20\14"+
-    "\1\12\16\14\1\12\6\2\3\14\1\12\2\14\1\12"+
-    "\3\14\11\2\1\14\3\2\3\14\1\2\2\14\1\12"+
-    "\2\2\1\0\13\2\1\12\3\2\1\12\1\2\5\14"+
-    "\3\2\1\0\1\2\2\0\1\13\5\14\1\13\1\14"+
-    "\1\0\1\12\1\0\1\12\2\0\2\14\1\0\26\14"+
-    "\3\0\5\14\3\0\4\14\2\0\7\14\2\12\4\14"+
-    "\1\0\6\14\1\12\2\14\1\12\4\14\5\0\1\12"+
-    "\2\0\3\14\1\12\13\14\1\12\12\14\6\0\1\14"+
-    "\3\0\13\14\5\0\1\12\4\0\2\14\3\0\1\12"+
-    "\14\0\1\14\4\0\1\12\4\0\1\14\1\12\2\0"+
-    "\1\12\1\0\2\14\3\0\11\14\1\0\2\14\1\0"+
-    "\1\14\1\0\3\14\1\12\4\14\1\12\2\14\2\0"+
-    "\2\14\1\0\1\14\1\0\1\12\1\14\5\0\2\14"+
-    "\4\0\1\12\1\14\3\12\21\14\1\12\6\14\1\0"+
-    "\7\14\1\12\5\14\2\12\4\0\12\14\4\0\4\14"+
-    "\1\12\6\14\1\12\2\0\12\14\4\0\4\14\5\0"+
-    "\6\14\7\0\5\14\1\12\6\14\1\12\2\14\1\0"+
-    "\1\12\2\0\4\14\1\12\1\14\1\12\3\14\5\0"+
-    "\1\12\1\0\5\14\1\12\3\14\1\12\2\14\1\12"+
-    "\5\14\3\0\3\14\1\12\20\14\1\12\16\14\1\12"+
-    "\6\0\3\14\1\12\2\14\1\12\3\14\11\0\1\14"+
-    "\3\0\3\14\1\0\2\14\1\12\15\0\1\12\3\0"+
-    "\1\12\1\0\5\14\2\0\3\4\15\0\3\15\1\13"+
-    "\u01c1\2\1\0\31\2\1\13\u01d8\0\3\4\16\0\1\11"+
-    "\2\0\2\11\1\0\10\11\1\0\2\11\1\0\4\11"+
-    "\1\0\1\11\1\0\2\11\2\0\6\11\2\0\1\11"+
-    "\1\0\1\11\1\0\11\11\3\0\3\11\1\0\3\11"+
-    "\2\0\2\11\1\0\3\11\7\0\1\11\10\0\1\11"+
-    "\4\0\1\11\1\0\2\11\1\0\2\11\2\0\1\11"+
-    "\1\0\1\11\1\0\3\11\3\0\1\11\1\0\1\11"+
-    "\1\0\1\11\1\0\1\11\2\0\1\11\2\0\2\11"+
-    "\1\0\3\11\1\0\5\11\1\0\1\11\1\0\1\11"+
-    "\3\0\4\11\1\0\6\11\4\0\1\11\2\0\1\11"+
-    "\6\0\1\11\1\0\2\11\1\0\5\11\3\0\2\11"+
-    "\2\0\5\11\1\0\14\11\1\0\1\11\2\0\3\11"+
-    "\5\0\2\11\1\0\2\11\2\0\2\11\26\0\3\11"+
-    "\3\0\1\11\1\0\1\13\1\14\1\13\32\14\2\15"+
-    "\1\0\2\15\1\0\2\15\1\0\1\15\1\2\2\14"+
-    "\24\2\1\12\14\2\1\12\11\2\2\14\2\12\10\2"+
-    "\3\14\1\12\1\14\7\2\1\12\1\2\2\14\21\2"+
-    "\1\12\24\2\1\12\1\14\5\2\2\14\12\2\1\12"+
-    "\4\2\1\14\3\2\1\14\1\2\1\14\15\2\1\12"+
-    "\2\14\5\2\1\14\6\2\1\14\13\2\2\14\1\12"+
-    "\2\2\4\14\1\2\1\12\20\2\4\14\5\2\1\12"+
-    "\3\2\1\12\1\2\2\14\4\2\1\12\1\2\1\14"+
-    "\5\2\1\12\4\2\1\14\3\2\1\14\1\12\11\2"+
-    "\1\12\4\2\1\12\7\2\1\14\3\2\2\12\1\14"+
-    "\2\2\1\14\12\2\1\14\5\2\2\14\1\2\3\14"+
-    "\1\2\1\12\1\14\4\2\1\12\2\2\1\12\5\2"+
-    "\1\14\3\2\1\12\1\2\1\12\20\2\1\12\6\2"+
-    "\1\14\1\12\1\2\1\12\1\2\1\14\21\2\1\0"+
-    "\4\2\1\12\4\2\1\14\7\2\1\0\1\2\2\0"+
-    "\32\14\1\0\2\14\24\0\1\12\14\0\1\12\11\0"+
-    "\2\14\2\12\10\0\3\14\1\12\1\14\7\0\1\12"+
-    "\1\0\2\14\21\0\1\12\24\0\1\12\1\14\5\0"+
-    "\2\14\12\0\1\12\4\0\1\14\3\0\1\14\1\0"+
-    "\1\14\15\0\1\12\2\14\5\0\1\14\6\0\1\14"+
-    "\13\0\2\14\1\12\2\0\4\14\1\0\1\12\20\0"+
-    "\4\14\5\0\1\12\3\0\1\12\1\0\2\14\4\0"+
-    "\1\12\1\0\1\14\5\0\1\12\4\0\1\14\3\0"+
-    "\1\14\1\12\11\0\1\12\4\0\1\12\7\0\1\14"+
-    "\3\0\2\12\1\14\2\0\1\14\12\0\1\14\5\0"+
-    "\2\14\1\0\3\14\1\0\1\12\1\14\4\0\1\12"+
-    "\2\0\1\12\5\0\1\14\3\0\1\12\1\0\1\12"+
-    "\20\0\1\12\6\0\1\14\1\12\1\0\1\12\1\0"+
-    "\1\14\25\0\1\12\4\0\1\14\10\0\2\4\20\0"+
-    "\u0155\2\1\0\20\2\u0165\0\2\4\15\0\1\11\4\0"+
-    "\1\11\1\0\1\11\60\0\2\11\10\0\1\11\2\0"+
-    "\1\11\11\0\1\11\5\0\1\11\13\0\1\11\42\0"+
-    "\1\11\11\0\1\11\21\0\1\11\5\0\1\11\2\0"+
-    "\1\11\26\0\1\11\4\0\1\11\7\0\1\11\14\0"+
-    "\1\11\1\0\3\11\20\0\1\11\15\0\1\11\5\0"+
-    "\2\11\24\0\1\11\4\0\1\11\52\0\1\11\6\0"+
-    "\1\11\3\0\1\11\5\0\1\11\1\0\1\11\10\0"+
-    "\1\11\6\0\1\11\3\0\1\11\2\0\1\11\13\0"+
-    "\1\11\16\0\1\11\16\0\1\11\11\0\1\11\2\0"+
-    "\1\11\24\0\1\11\16\0\1\11\3\0\1\11\7\0"+
-    "\1\15\3\0\22\2\1\14\7\2\1\14\6\2\1\14"+
-    "\2\2\1\12\3\2\1\14\14\2\1\14\1\12\10\2"+
-    "\1\14\7\2\1\12\17\2\1\12\4\2\1\14\22\2"+
-    "\1\14\17\2\1\14\1\2\1\14\6\2\1\14\1\2"+
-    "\2\14\6\2\1\14\16\2\2\14\16\2\1\14\5\2"+
-    "\1\14\4\2\1\14\17\2\1\14\10\2\41\0\1\12"+
-    "\1\14\7\2\33\0\1\14\7\0\1\14\6\0\1\14"+
-    "\2\0\1\12\3\0\1\14\14\0\1\14\1\12\10\0"+
-    "\1\14\7\0\1\12\17\0\1\12\4\0\1\14\22\0"+
-    "\1\14\17\0\1\14\1\0\1\14\6\0\1\14\1\0"+
-    "\2\14\6\0\1\14\16\0\2\14\16\0\1\14\5\0"+
-    "\1\14\4\0\1\14\17\0\1\14\10\0\1\12\1\14"+
-    "\7\0\3\12\20\0\312\2\41\0\11\2\323\0\3\4"+
-    "\42\0\1\11\11\0\1\11\12\0\2\11\12\0\1\11"+
-    "\10\0\1\11\22\0\1\11\23\0\1\11\22\0\1\11"+
-    "\23\0\1\11\31\0\1\11\7\0\1\11\26\0\1\11"+
-    "\3\0\1\11\6\0\1\11\7\0\1\11\11\0\1\11"+
-    "\11\0\1\11\4\0\1\11\13\0\2\11\27\0\1\11"+
-    "\5\0\1\11\2\0\1\11\10\0\1\11\1\0\1\11"+
-    "\20\0\1\11\5\0\1\11\1\0\1\11\24\0\1\11"+
-    "\13\0\1\15\24\2\1\14\10\2\1\14\4\2\1\12"+
-    "\5\2\1\14\1\2\1\12\3\2\1\14\22\2\1\12"+
-    "\17\2\1\12\2\2\1\12\34\2\142\0\4\2\1\0"+
-    "\1\15\6\0\1\2\3\0\1\15\6\0\1\15\25\0"+
-    "\1\14\10\0\1\14\4\0\1\12\5\0\1\14\1\0"+
-    "\1\12\3\0\1\14\22\0\1\12\17\0\1\12\2\0"+
-    "\1\12\37\0\1\13\1\14\1\13\1\14\2\13\4\14"+
-    "\14\0\151\2\142\0\4\2\154\0\1\13\2\4\56\0"+
-    "\1\11\20\0\1\11\20\0\1\11\16\0\1\11\227\0"+
-    "\1\11\10\0\1\15\23\2\2\14\1\2\1\14\11\2"+
-    "\1\12\2\2\2\14\1\12\4\2\1\14\3\2\1\12"+
-    "\11\2\142\0\2\2\5\0\1\15\10\0\2\2\42\0"+
-    "\2\14\1\0\1\14\11\0\1\12\2\0\2\14\1\12"+
-    "\4\0\1\14\3\0\1\12\13\0\1\14\14\0\62\2"+
-    "\142\0\2\2\137\0\1\11\7\0\1\11\24\0\1\11"+
-    "\17\0\1\11\2\0\1\11\176\0\1\15\20\2\1\14"+
-    "\5\2\160\0\2\2\1\15\54\0\1\14\23\0\24\2"+
-    "\157\0\2\2\76\0\1\11\3\0\1\11\7\0\1\11"+
-    "\155\0\6\2\1\12\10\2\63\0\1\12\42\0\1\2"+
-    "\1\15\7\0\1\15\2\0\1\16\37\0\1\12\24\0"+
-    "\16\2\126\0\1\2\237\0\12\2\41\0\1\14\22\0"+
-    "\1\2\1\15\130\0\12\2\63\0\1\2\31\0\1\11"+
-    "\73\0\1\11\43\0\5\2\5\0\1\12\14\0\1\12"+
-    "\26\0\1\15\16\0\1\16\2\0\2\16\1\0\10\16"+
-    "\1\0\2\16\1\0\4\16\1\0\1\16\1\0\2\16"+
-    "\2\0\6\16\2\0\1\16\1\0\1\16\1\0\11\16"+
-    "\3\0\3\16\1\0\3\16\2\0\2\16\1\0\3\16"+
-    "\7\0\1\16\10\0\1\16\4\0\1\16\1\0\2\16"+
-    "\1\0\2\16\2\0\1\16\1\0\1\16\1\0\3\16"+
-    "\3\0\1\16\1\0\1\16\1\0\1\16\1\0\1\16"+
-    "\2\0\1\16\2\0\2\16\1\0\3\16\1\0\5\16"+
-    "\1\0\1\16\1\0\1\16\3\0\4\16\1\0\6\16"+
-    "\4\0\1\16\2\0\1\16\6\0\1\16\1\0\2\16"+
-    "\1\0\5\16\3\0\2\16\2\0\5\16\1\0\14\16"+
-    "\1\0\1\16\2\0\3\16\5\0\2\16\1\0\2\16"+
-    "\2\0\2\16\26\0\3\16\3\0\1\16\54\0\4\2"+
-    "\164\0\1\2\14\0\1\14\20\0\1\15\15\0\1\16"+
-    "\4\0\1\16\1\0\1\16\60\0\2\16\10\0\1\16"+
-    "\2\0\1\16\11\0\1\16\5\0\1\16\13\0\1\16"+
-    "\42\0\1\16\11\0\1\16\21\0\1\16\5\0\1\16"+
-    "\2\0\1\16\26\0\1\16\4\0\1\16\7\0\1\16"+
-    "\14\0\1\16\1\0\3\16\20\0\1\16\15\0\1\16"+
-    "\5\0\2\16\24\0\1\16\4\0\1\16\52\0\1\16"+
-    "\6\0\1\16\3\0\1\16\5\0\1\16\1\0\1\16"+
-    "\10\0\1\16\6\0\1\16\3\0\1\16\2\0\1\16"+
-    "\13\0\1\16\16\0\1\16\16\0\1\16\11\0\1\16"+
-    "\2\0\1\16\24\0\1\16\16\0\1\16\3\0\1\16"+
-    "\57\0\1\2\61\0\1\11\14\0\1\11\26\0\1\2"+
-    "\62\0\1\16\11\0\1\16\12\0\2\16\12\0\1\16"+
-    "\10\0\1\16\22\0\1\16\23\0\1\16\22\0\1\16"+
-    "\23\0\1\16\31\0\1\16\7\0\1\16\26\0\1\16"+
-    "\3\0\1\16\6\0\1\16\7\0\1\16\11\0\1\16"+
-    "\11\0\1\16\4\0\1\16\13\0\2\16\27\0\1\16"+
-    "\5\0\1\16\2\0\1\16\10\0\1\16\1\0\1\16"+
-    "\20\0\1\16\5\0\1\16\1\0\1\16\24\0\1\16"+
-    "\13\0\1\15\42\0\1\2\67\0\1\2\70\0\1\16"+
-    "\20\0\1\16\20\0\1\16\16\0\1\16\227\0\1\16"+
-    "\45\0\1\2\41\0\1\2\61\0\1\16\7\0\1\16"+
-    "\24\0\1\16\17\0\1\16\2\0\1\16\325\0\1\16"+
-    "\3\0\1\16\7\0\1\16\u0144\0\1\16\73\0\1\16"+
-    "\273\0\1\16\14\0\1\16\224\0";
+    "\26\0\3\12\3\0\1\12\1\0\1\5\1\0\1\5"+
+    "\1\16\1\0\1\16\2\14\32\15\2\16\1\0\2\16"+
+    "\1\0\1\16\1\0\2\15\24\0\1\13\14\0\1\13"+
+    "\11\0\2\15\2\13\10\0\3\15\1\13\1\15\7\0"+
+    "\1\13\1\0\2\15\21\0\1\13\24\0\1\13\1\15"+
+    "\5\0\2\15\12\0\1\13\4\0\1\15\3\0\1\15"+
+    "\1\0\1\15\15\0\1\13\2\15\5\0\1\15\6\0"+
+    "\1\15\13\0\2\15\1\13\2\0\4\15\1\0\1\13"+
+    "\20\0\4\15\5\0\1\13\3\0\1\13\1\0\2\15"+
+    "\4\0\1\13\1\0\1\15\5\0\1\13\4\0\1\15"+
+    "\3\0\1\15\1\13\11\0\1\13\4\0\1\13\7\0"+
+    "\1\15\3\0\2\13\1\15\2\0\1\15\12\0\1\15"+
+    "\5\0\2\15\1\0\3\15\1\0\1\13\1\15\4\0"+
+    "\1\13\2\0\1\13\5\0\1\15\3\0\1\13\1\0"+
+    "\1\13\20\0\1\13\6\0\1\15\1\13\1\0\1\13"+
+    "\1\0\1\15\26\0\1\13\4\0\1\15\7\0\33\15"+
+    "\1\3\2\15\24\3\1\13\14\3\1\13\11\3\2\15"+
+    "\2\13\10\3\3\15\1\13\1\15\7\3\1\13\1\3"+
+    "\2\15\21\3\1\13\24\3\1\13\1\15\5\3\2\15"+
+    "\12\3\1\13\4\3\1\15\3\3\1\15\1\3\1\15"+
+    "\15\3\1\13\2\15\5\3\1\15\6\3\1\15\13\3"+
+    "\2\15\1\13\2\3\4\15\1\3\1\13\20\3\4\15"+
+    "\5\3\1\13\3\3\1\13\1\3\2\15\4\3\1\13"+
+    "\1\3\1\15\5\3\1\13\4\3\1\15\3\3\1\15"+
+    "\1\13\11\3\1\13\4\3\1\13\7\3\1\15\3\3"+
+    "\2\13\1\15\2\3\1\15\12\3\1\15\5\3\2\15"+
+    "\1\3\3\15\1\3\1\13\1\15\4\3\1\13\2\3"+
+    "\1\13\5\3\1\15\3\3\1\13\1\3\1\13\20\3"+
+    "\1\13\6\3\1\15\1\13\1\3\1\13\1\3\1\15"+
+    "\25\3\1\13\4\3\1\15\7\3\1\0\1\3\22\0"+
+    "\1\5\1\0\1\5\u0165\0\u0165\3\15\0\1\12\4\0"+
+    "\1\12\1\0\1\12\60\0\2\12\10\0\1\12\2\0"+
+    "\1\12\11\0\1\12\5\0\1\12\13\0\1\12\42\0"+
+    "\1\12\11\0\1\12\21\0\1\12\5\0\1\12\2\0"+
+    "\1\12\26\0\1\12\4\0\1\12\7\0\1\12\14\0"+
+    "\1\12\1\0\3\12\20\0\1\12\15\0\1\12\5\0"+
+    "\2\12\24\0\1\12\4\0\1\12\52\0\1\12\6\0"+
+    "\1\12\3\0\1\12\5\0\1\12\1\0\1\12\10\0"+
+    "\1\12\6\0\1\12\3\0\1\12\2\0\1\12\13\0"+
+    "\1\12\16\0\1\12\16\0\1\12\11\0\1\12\2\0"+
+    "\1\12\24\0\1\12\16\0\1\12\3\0\1\12\7\0"+
+    "\3\13\1\0\1\16\24\0\1\15\7\0\1\15\6\0"+
+    "\1\15\2\0\1\13\3\0\1\15\14\0\1\15\1\13"+
+    "\10\0\1\15\7\0\1\13\17\0\1\13\4\0\1\15"+
+    "\22\0\1\15\17\0\1\15\1\0\1\15\6\0\1\15"+
+    "\1\0\2\15\6\0\1\15\16\0\2\15\16\0\1\15"+
+    "\5\0\1\15\4\0\1\15\17\0\1\15\51\0\1\13"+
+    "\1\15\7\0\22\3\1\15\7\3\1\15\6\3\1\15"+
+    "\2\3\1\13\3\3\1\15\14\3\1\15\1\13\10\3"+
+    "\1\15\7\3\1\13\17\3\1\13\4\3\1\15\22\3"+
+    "\1\15\17\3\1\15\1\3\1\15\6\3\1\15\1\3"+
+    "\2\15\6\3\1\15\16\3\2\15\16\3\1\15\5\3"+
+    "\1\15\4\3\1\15\17\3\1\15\10\3\1\13\1\15"+
+    "\7\3\31\0\3\5\364\0\323\3\42\0\1\12\11\0"+
+    "\1\12\12\0\2\12\12\0\1\12\10\0\1\12\22\0"+
+    "\1\12\23\0\1\12\22\0\1\12\23\0\1\12\31\0"+
+    "\1\12\7\0\1\12\26\0\1\12\3\0\1\12\6\0"+
+    "\1\12\7\0\1\12\11\0\1\12\11\0\1\12\4\0"+
+    "\1\12\13\0\2\12\27\0\1\12\5\0\1\12\2\0"+
+    "\1\12\10\0\1\12\1\0\1\12\20\0\1\12\5\0"+
+    "\1\12\1\0\1\12\24\0\1\12\13\0\1\14\2\15"+
+    "\4\14\4\15\1\16\24\0\1\15\10\0\1\15\4\0"+
+    "\1\13\5\0\1\15\1\0\1\13\3\0\1\15\22\0"+
+    "\1\13\17\0\1\13\2\0\1\13\201\0\24\3\1\15"+
+    "\10\3\1\15\4\3\1\13\5\3\1\15\1\3\1\13"+
+    "\3\3\1\15\22\3\1\13\17\3\1\13\2\3\1\13"+
+    "\40\3\1\16\7\0\1\3\3\0\1\16\6\0\1\16"+
+    "\15\0\1\5\1\14\1\5\316\0\155\3\56\0\1\12"+
+    "\20\0\1\12\20\0\1\12\16\0\1\12\227\0\1\12"+
+    "\10\0\1\15\1\16\23\0\2\15\1\0\1\15\11\0"+
+    "\1\13\2\0\2\15\1\13\4\0\1\15\3\0\1\13"+
+    "\155\0\23\3\2\15\1\3\1\15\11\3\1\13\2\3"+
+    "\2\15\1\13\4\3\1\15\3\3\1\13\13\3\5\0"+
+    "\1\16\6\0\1\3\2\0\1\3\261\0\64\3\53\0"+
+    "\1\12\7\0\1\12\24\0\1\12\17\0\1\12\2\0"+
+    "\1\12\176\0\1\16\20\0\1\15\167\0\20\3\1\15"+
+    "\7\3\1\16\255\0\26\3\50\0\1\12\3\0\1\12"+
+    "\7\0\1\12\163\0\1\13\76\0\1\13\40\0\6\3"+
+    "\1\13\11\3\1\16\7\0\1\16\2\0\1\17\211\0"+
+    "\17\3\275\0\1\15\21\0\13\3\1\16\213\0\13\3"+
+    "\16\0\1\12\76\0\1\12\54\0\1\13\14\0\1\13"+
+    "\24\0\5\3\1\16\16\0\1\17\2\0\2\17\1\0"+
+    "\10\17\1\0\2\17\1\0\4\17\1\0\1\17\1\0"+
+    "\2\17\2\0\6\17\2\0\1\17\1\0\1\17\1\0"+
+    "\11\17\3\0\3\17\1\0\3\17\2\0\2\17\1\0"+
+    "\3\17\7\0\1\17\10\0\1\17\4\0\1\17\1\0"+
+    "\2\17\1\0\2\17\2\0\1\17\1\0\1\17\1\0"+
+    "\3\17\3\0\1\17\1\0\1\17\1\0\1\17\1\0"+
+    "\1\17\2\0\1\17\2\0\2\17\1\0\3\17\1\0"+
+    "\5\17\1\0\1\17\1\0\1\17\3\0\4\17\1\0"+
+    "\6\17\4\0\1\17\2\0\1\17\6\0\1\17\1\0"+
+    "\2\17\1\0\5\17\3\0\2\17\2\0\5\17\1\0"+
+    "\14\17\1\0\1\17\2\0\3\17\5\0\2\17\1\0"+
+    "\2\17\2\0\2\17\26\0\3\17\3\0\1\17\124\0"+
+    "\4\3\125\0\1\15\17\0\1\3\1\16\15\0\1\17"+
+    "\4\0\1\17\1\0\1\17\60\0\2\17\10\0\1\17"+
+    "\2\0\1\17\11\0\1\17\5\0\1\17\13\0\1\17"+
+    "\42\0\1\17\11\0\1\17\21\0\1\17\5\0\1\17"+
+    "\2\0\1\17\26\0\1\17\4\0\1\17\7\0\1\17"+
+    "\14\0\1\17\1\0\3\17\20\0\1\17\15\0\1\17"+
+    "\5\0\2\17\24\0\1\17\4\0\1\17\52\0\1\17"+
+    "\6\0\1\17\3\0\1\17\5\0\1\17\1\0\1\17"+
+    "\10\0\1\17\6\0\1\17\3\0\1\17\2\0\1\17"+
+    "\13\0\1\17\16\0\1\17\16\0\1\17\11\0\1\17"+
+    "\2\0\1\17\24\0\1\17\16\0\1\17\3\0\1\17"+
+    "\112\0\1\3\27\0\1\12\14\0\1\12\46\0\1\3"+
+    "\41\0\1\17\11\0\1\17\12\0\2\17\12\0\1\17"+
+    "\10\0\1\17\22\0\1\17\23\0\1\17\22\0\1\17"+
+    "\23\0\1\17\31\0\1\17\7\0\1\17\26\0\1\17"+
+    "\3\0\1\17\6\0\1\17\7\0\1\17\11\0\1\17"+
+    "\11\0\1\17\4\0\1\17\13\0\2\17\27\0\1\17"+
+    "\5\0\1\17\2\0\1\17\10\0\1\17\1\0\1\17"+
+    "\20\0\1\17\5\0\1\17\1\0\1\17\24\0\1\17"+
+    "\13\0\1\16\63\0\1\3\60\0\1\3\56\0\1\17"+
+    "\20\0\1\17\20\0\1\17\16\0\1\17\227\0\1\17"+
+    "\57\0\1\3\35\0\1\3\53\0\1\17\7\0\1\17"+
+    "\24\0\1\17\17\0\1\17\2\0\1\17\324\0\1\17"+
+    "\3\0\1\17\7\0\1\17\u0144\0\1\17\76\0\1\17"+
+    "\272\0\1\17\14\0\1\17\222\0";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[12851];
+    int [] result = new int[12892];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -467,1616 +533,1621 @@ public final class UAX29URLEmailTokenizerImpl {
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\124\0\250\0\374\0\u0150\0\u01a4\0\u01f8\0\u024c"+
-    "\0\u02a0\0\u02f4\0\u0348\0\u039c\0\u03f0\0\u0444\0\u0498\0\u04ec"+
-    "\0\u0540\0\u0594\0\u05e8\0\u063c\0\u0690\0\u06e4\0\u0738\0\u078c"+
-    "\0\u07e0\0\u0834\0\u0888\0\u08dc\0\u0930\0\u0984\0\u09d8\0\u0a2c"+
-    "\0\u0a80\0\u0ad4\0\u0b28\0\u0b7c\0\u0bd0\0\u024c\0\u0c24\0\u0348"+
-    "\0\u0c78\0\u0ccc\0\u039c\0\u0d20\0\u0d74\0\u0dc8\0\u0e1c\0\u0444"+
-    "\0\u0e70\0\u0ec4\0\u0f18\0\u0f6c\0\u0fc0\0\u1014\0\u1068\0\u10bc"+
-    "\0\u1110\0\u1164\0\u11b8\0\u120c\0\u1260\0\u12b4\0\u1308\0\u135c"+
-    "\0\u13b0\0\u0738\0\u1404\0\u1458\0\u14ac\0\u1500\0\u1554\0\u15a8"+
-    "\0\u15fc\0\u1650\0\u16a4\0\u16f8\0\u174c\0\u17a0\0\u17f4\0\u1848"+
-    "\0\u189c\0\u18f0\0\u1944\0\u1998\0\u19ec\0\u1a40\0\u1a94\0\u1ae8"+
-    "\0\u1b3c\0\u1b90\0\u1be4\0\u1c38\0\u1c8c\0\u1ce0\0\u1d34\0\u1d88"+
-    "\0\u1ddc\0\u1e30\0\u1e84\0\u1ed8\0\u1f2c\0\u1f80\0\u1fd4\0\u2028"+
-    "\0\u207c\0\u20d0\0\u2124\0\u2178\0\u21cc\0\u2220\0\u2274\0\u22c8"+
-    "\0\u231c\0\u2370\0\u23c4\0\u2418\0\u246c\0\u24c0\0\u2514\0\u2568"+
-    "\0\u25bc\0\u2610\0\u2664\0\u26b8\0\u270c\0\u2760\0\u27b4\0\u2808"+
-    "\0\u285c\0\u28b0\0\u2904\0\u2958\0\u29ac\0\u2a00\0\u2a54\0\u2aa8"+
-    "\0\u2afc\0\u2b50\0\u2ba4\0\u2bf8\0\u2c4c\0\u2ca0\0\u2cf4\0\u2d48"+
-    "\0\u2d9c\0\u2df0\0\u2e44\0\u2e98\0\u2eec\0\u2f40\0\u2f94\0\u2fe8"+
-    "\0\u303c\0\u3090\0\u30e4\0\u3138\0\u318c\0\u31e0\0\u3234\0\u3288"+
-    "\0\u32dc\0\u3330\0\u3384\0\u33d8\0\u342c\0\u3480\0\u34d4\0\u3528"+
-    "\0\u357c\0\u35d0\0\u3624\0\u3678\0\u36cc\0\u3720\0\u3774\0\u37c8"+
-    "\0\u381c\0\u3870\0\u38c4\0\u3918\0\u396c\0\u39c0\0\u3a14\0\u3a68"+
-    "\0\u3abc\0\u3b10\0\u3b64\0\u3bb8\0\u3c0c\0\u3c60\0\u3cb4\0\u3d08"+
-    "\0\u3d5c\0\u3db0\0\u3e04\0\u3e58\0\u3eac\0\u3f00\0\u3f54\0\u3fa8"+
-    "\0\u3ffc\0\u4050\0\u40a4\0\u40f8\0\u414c\0\u41a0\0\u41f4\0\u4248"+
-    "\0\u429c\0\u42f0\0\u4344\0\u4398\0\u43ec\0\u4440\0\u4494\0\u44e8"+
-    "\0\u453c\0\u4590\0\u45e4\0\u4638\0\250\0\u468c\0\u46e0\0\u4734"+
-    "\0\u4788\0\u47dc\0\u4830\0\u4884\0\u48d8\0\u492c\0\u4980\0\u49d4"+
-    "\0\u4a28\0\u4a7c\0\u4ad0\0\u4b24\0\u4b78\0\u4bcc\0\u4c20\0\u4c74"+
-    "\0\u4cc8\0\u4d1c\0\u4d70\0\u4dc4\0\u4e18\0\u4e6c\0\u4ec0\0\u4f14"+
-    "\0\u4f68\0\u4fbc\0\u5010\0\u5064\0\u50b8\0\u510c\0\u5160\0\u51b4"+
-    "\0\u5208\0\u525c\0\u52b0\0\u5304\0\u5358\0\u53ac\0\u5400\0\u5454"+
-    "\0\u54a8\0\u54fc\0\u5550\0\u55a4\0\u55f8\0\u564c\0\u56a0\0\u56f4"+
-    "\0\u5748\0\u579c\0\u57f0\0\u5844\0\u5898\0\u58ec\0\u5940\0\u5994"+
-    "\0\u59e8\0\u5a3c\0\u5a90\0\u5ae4\0\u5b38\0\u5b8c\0\u5be0\0\u5c34"+
-    "\0\u5c88\0\u5cdc\0\u5d30\0\u5d84\0\u5dd8\0\u5e2c\0\u5e80\0\u5ed4"+
-    "\0\u5f28\0\u5f7c\0\u5fd0\0\u6024\0\u6078\0\u60cc\0\u6120\0\u6174"+
-    "\0\u61c8\0\u621c\0\u6270\0\u62c4\0\u6318\0\u636c\0\u63c0\0\u6414"+
-    "\0\u6468\0\u64bc\0\u6510\0\u6564\0\u65b8\0\u660c\0\u6660\0\u66b4"+
-    "\0\u6708\0\u675c\0\u67b0\0\u6804\0\u6858\0\u68ac\0\u6900\0\u6954"+
-    "\0\u69a8\0\u69fc\0\u6a50\0\u6aa4\0\u6af8\0\u6b4c\0\u6ba0\0\u6bf4"+
-    "\0\u6c48\0\u6c9c\0\u6cf0\0\u6d44\0\u6d98\0\u6dec\0\u6e40\0\u6e94"+
-    "\0\u6ee8\0\u6f3c\0\u6f90\0\u6fe4\0\u7038\0\u708c\0\u70e0\0\u7134"+
-    "\0\u7188\0\u71dc\0\u7230\0\u7284\0\u72d8\0\u732c\0\u7380\0\u73d4"+
-    "\0\u7428\0\u747c\0\u74d0\0\u7524\0\u7578\0\u75cc\0\u7620\0\u7674"+
-    "\0\u76c8\0\u771c\0\u7770\0\u77c4\0\u7818\0\u786c\0\u78c0\0\u7914"+
-    "\0\u7968\0\u79bc\0\u7a10\0\u7a64\0\u7ab8\0\u7b0c\0\u7b60\0\u7bb4"+
-    "\0\u7c08\0\u7c5c\0\u7cb0\0\u7d04\0\u7d58\0\u7dac\0\u7e00\0\u7e54"+
-    "\0\u7ea8\0\u7efc\0\u7f50\0\u7fa4\0\u7ff8\0\u804c\0\u80a0\0\u80f4"+
-    "\0\u8148\0\u819c\0\u81f0\0\u8244\0\u8298\0\u82ec\0\u8340\0\u8394"+
-    "\0\u83e8\0\u843c\0\u8490\0\u84e4\0\u8538\0\u858c\0\u85e0\0\u8634"+
-    "\0\u8688\0\u86dc\0\u8730\0\u8784\0\u87d8\0\u882c\0\u8880\0\u88d4"+
-    "\0\u8928\0\u897c\0\u89d0\0\u8a24\0\u8a78\0\u8acc\0\u8b20\0\u8b74"+
-    "\0\u8bc8\0\u8c1c\0\u8c70\0\u8cc4\0\u8d18\0\u8d6c\0\u8dc0\0\u8e14"+
-    "\0\u8e68\0\u8ebc\0\u8f10\0\u8f64\0\u8fb8\0\u900c\0\u9060\0\u90b4"+
-    "\0\u9108\0\u915c\0\u91b0\0\u9204\0\u9258\0\u92ac\0\u9300\0\u9354"+
-    "\0\u93a8\0\u93fc\0\u9450\0\u94a4\0\u94f8\0\u954c\0\u95a0\0\u95f4"+
-    "\0\u9648\0\u969c\0\u96f0\0\u9744\0\u9798\0\u97ec\0\u9840\0\u9894"+
-    "\0\u98e8\0\u993c\0\u9990\0\u99e4\0\u9a38\0\u9a8c\0\u9ae0\0\u9b34"+
-    "\0\u9b88\0\u9bdc\0\u9c30\0\u9c84\0\u9cd8\0\u9d2c\0\u9d80\0\u9dd4"+
-    "\0\u9e28\0\u9e7c\0\u9ed0\0\u9f24\0\u9f78\0\u9fcc\0\ua020\0\ua074"+
-    "\0\ua0c8\0\ua11c\0\ua170\0\ua1c4\0\ua218\0\ua26c\0\ua2c0\0\ua314"+
-    "\0\ua368\0\ua3bc\0\ua410\0\ua464\0\ua4b8\0\ua50c\0\ua560\0\ua5b4"+
-    "\0\ua608\0\ua65c\0\ua6b0\0\ua704\0\ua758\0\ua7ac\0\ua800\0\ua854"+
-    "\0\ua8a8\0\ua8fc\0\ua950\0\ua9a4\0\ua9f8\0\uaa4c\0\uaaa0\0\uaaf4"+
-    "\0\uab48\0\uab9c\0\uabf0\0\uac44\0\uac98\0\uacec\0\uad40\0\uad94"+
-    "\0\uade8\0\uae3c\0\uae90\0\uaee4\0\uaf38\0\uaf8c\0\uafe0\0\ub034"+
-    "\0\ub088\0\ub0dc\0\ub130\0\ub184\0\ub1d8\0\ub22c\0\ub280\0\ub2d4"+
-    "\0\ub328\0\ub37c\0\ub3d0\0\ub424\0\ub478\0\ub4cc\0\ub520\0\ub574"+
-    "\0\ub5c8\0\ub61c\0\ub670\0\ub6c4\0\ub718\0\ub76c\0\ub7c0\0\ub814"+
-    "\0\ub868\0\ub8bc\0\ub910\0\ub964\0\ub9b8\0\uba0c\0\uba60\0\ubab4"+
-    "\0\ubb08\0\ubb5c\0\ubbb0\0\ubc04\0\ubc58\0\ubcac\0\ubd00\0\ubd54"+
-    "\0\ubda8\0\ubdfc\0\ube50\0\ubea4\0\ubef8\0\ubf4c\0\ubfa0\0\ubff4"+
-    "\0\uc048\0\uc09c\0\uc0f0\0\uc144\0\uc198\0\uc1ec\0\uc240\0\uc294"+
-    "\0\uc2e8\0\uc33c\0\uc390\0\uc3e4\0\uc438\0\uc48c\0\uc4e0\0\uc534"+
-    "\0\uc588\0\uc5dc\0\uc630\0\uc684\0\uc6d8\0\uc72c\0\uc780\0\uc7d4"+
-    "\0\uc828\0\uc87c\0\uc8d0\0\uc924\0\uc978\0\uc9cc\0\uca20\0\uca74"+
-    "\0\ucac8\0\ucb1c\0\ucb70\0\ucbc4\0\ucc18\0\ucc6c\0\uccc0\0\ucd14"+
-    "\0\ucd68\0\ucdbc\0\uce10\0\uce64\0\uceb8\0\ucf0c\0\ucf60\0\ucfb4"+
-    "\0\ud008\0\ud05c\0\ud0b0\0\ud104\0\ud158\0\ud1ac\0\ud200\0\ud254"+
-    "\0\ud2a8\0\ud2fc\0\ud350\0\ud3a4\0\ud3f8\0\ud44c\0\ud4a0\0\ud4f4"+
-    "\0\ud548\0\ud59c\0\ud5f0\0\ud644\0\ud698\0\ud6ec\0\ud740\0\ud794"+
-    "\0\ud7e8\0\ud83c\0\ud890\0\ud8e4\0\ud938\0\ud98c\0\ud9e0\0\uda34"+
-    "\0\uda88\0\udadc\0\udb30\0\udb84\0\udbd8\0\udc2c\0\udc80\0\udcd4"+
-    "\0\udd28\0\udd7c\0\uddd0\0\ude24\0\ude78\0\udecc\0\udf20\0\udf74"+
-    "\0\udfc8\0\ue01c\0\ue070\0\ue0c4\0\ue118\0\ue16c\0\ue1c0\0\ue214"+
-    "\0\ue268\0\ue2bc\0\ue310\0\ue364\0\ue3b8\0\ue40c\0\ue460\0\ue4b4"+
-    "\0\ue508\0\ue55c\0\ue5b0\0\ue604\0\ue658\0\ue6ac\0\ue700\0\ue754"+
-    "\0\ue7a8\0\ue7fc\0\ue850\0\ue8a4\0\ue8f8\0\ue94c\0\ue9a0\0\ue9f4"+
-    "\0\uea48\0\uea9c\0\ueaf0\0\ueb44\0\ueb98\0\uebec\0\uec40\0\uec94"+
-    "\0\uece8\0\ued3c\0\ued90\0\uede4\0\uee38\0\uee8c\0\ueee0\0\uef34"+
-    "\0\uef88\0\uefdc\0\uf030\0\uf084\0\uf0d8\0\uf12c\0\uf180\0\uf1d4"+
-    "\0\uf228\0\uf27c\0\uf2d0\0\uf324\0\uf378\0\uf3cc\0\uf420\0\uf474"+
-    "\0\uf4c8\0\uf51c\0\uf570\0\uf5c4\0\uf618\0\uf66c\0\uf6c0\0\uf714"+
-    "\0\uf768\0\uf7bc\0\uf810\0\uf864\0\uf8b8\0\uf90c\0\uf960\0\uf9b4"+
-    "\0\ufa08\0\ufa5c\0\ufab0\0\ufb04\0\ufb58\0\ufbac\0\ufc00\0\ufc54"+
-    "\0\ufca8\0\ufcfc\0\ufd50\0\ufda4\0\ufdf8\0\ufe4c\0\ufea0\0\ufef4"+
-    "\0\uff48\0\uff9c\0\ufff0\1\104\1\230\1\354\1\u0140\1\u0194"+
-    "\1\u01e8\1\u023c\1\u0290\1\u02e4\1\u0338\1\u038c\1\u03e0\1\u0434"+
-    "\1\u0488\1\u04dc\1\u0530\1\u0584\1\u05d8\1\u062c\1\u0680\1\u06d4"+
-    "\1\u0728\1\u077c\1\u07d0\1\u0824\1\u0878\1\u08cc\1\u0920\1\u0974"+
-    "\1\u09c8\1\u0a1c\1\u0a70\1\u0ac4\1\u0b18\1\u0b6c\1\u0bc0\1\u0c14"+
-    "\1\u0c68\1\u0cbc\1\u0d10\1\u0d64\1\u0db8\1\u0e0c\1\u0e60\1\u0eb4"+
-    "\1\u0f08\1\u0f5c\1\u0fb0\1\u1004\1\u1058\1\u10ac\1\u1100\1\u1154"+
-    "\1\u11a8\1\u11fc\1\u1250\1\u12a4\1\u12f8\1\u134c\1\u13a0\1\u13f4"+
-    "\1\u1448\1\u149c\1\u14f0\1\u1544\1\u1598\1\u15ec\1\u1640\1\u1694"+
-    "\1\u16e8\1\u173c\1\u1790\1\u17e4\1\u1838\1\u188c\1\u18e0\1\u1934"+
-    "\1\u1988\1\u19dc\1\u1a30\1\u1a84\1\u1ad8\1\u1b2c\1\u1b80\1\u1bd4"+
-    "\1\u1c28\1\u1c7c\1\u1cd0\1\u1d24\1\u1d78\1\u1dcc\1\u1e20\1\u1e74"+
-    "\1\u1ec8\1\u1f1c\1\u1f70\1\u1fc4\1\u2018\1\u206c\1\u20c0\1\u2114"+
-    "\1\u2168\1\u21bc\1\u2210\1\u2264\1\u22b8\1\u230c\1\u2360\1\u23b4"+
-    "\1\u2408\1\u245c\1\u24b0\1\u2504\1\u2558\1\u25ac\1\u2600\1\u2654"+
-    "\1\u26a8\1\u26fc\1\u2750\1\u27a4\1\u27f8\1\u284c\1\u28a0\1\u28f4"+
-    "\1\u2948\1\u299c\1\u29f0\1\u2a44\1\u2a98\1\u2aec\1\u2b40\1\u2b94"+
-    "\1\u2be8\1\u2c3c\1\u2c90\1\u2ce4\1\u2d38\1\u2d8c\1\u2de0\1\u2e34"+
-    "\1\u2e88\1\u2edc\1\u2f30\1\u2f84\1\u2fd8\1\u302c\1\u3080\1\u30d4"+
-    "\1\u3128\1\u317c\1\u31d0\1\u3224\1\u3278\1\u32cc\1\u3320\1\u3374"+
-    "\1\u33c8\1\u341c\1\u3470\1\u34c4\1\u3518\1\u356c\1\u35c0\1\u3614"+
-    "\1\u3668\1\u36bc\1\u3710\1\u3764\1\u37b8\1\u380c\1\u3860\1\u38b4"+
-    "\1\u3908\1\u395c\1\u39b0\1\u3a04\1\u3a58\1\u3aac\1\u3b00\1\u3b54"+
-    "\1\u3ba8\1\u3bfc\1\u3c50\1\u3ca4\1\u3cf8\1\u3d4c\1\u3da0\1\u3df4"+
-    "\1\u3e48\1\u3e9c\1\u3ef0\1\u3f44\1\u3f98\1\u3fec\1\u4040\1\u4094"+
-    "\1\u40e8\1\u413c\1\u4190\1\u41e4\1\u4238\1\u428c\1\u42e0\1\u4334"+
-    "\1\u4388\1\u43dc\1\u4430\1\u4484\1\u44d8\1\u452c\1\u4580\1\u45d4"+
-    "\1\u4628\1\u467c\1\u46d0\1\u4724\1\u4778\1\u47cc\1\u4820\1\u4874"+
-    "\1\u48c8\1\u491c\1\u4970\1\u49c4\1\u4a18\1\u4a6c\1\u4ac0\1\u4b14"+
-    "\1\u4b68\1\u4bbc\1\u4c10\1\u4c64\1\u4cb8\1\u4d0c\1\u4d60\1\u4db4"+
-    "\1\u4e08\1\u4e5c\1\u4eb0\1\u4f04\1\u4f58\1\u4fac\1\u5000\1\u5054"+
-    "\1\u50a8\1\u50fc\1\u5150\1\u51a4\1\u51f8\1\u524c\1\u52a0\1\u52f4"+
-    "\1\u5348\1\u539c\1\u53f0\1\u5444\1\u5498\1\u54ec\1\u5540\1\u5594"+
-    "\1\u55e8\1\u563c\1\u5690\1\u56e4\1\u5738\1\u578c\1\u57e0\1\u5834"+
-    "\1\u5888\1\u58dc\1\u5930\1\u5984\1\u59d8\1\u5a2c\1\u5a80\1\u5ad4"+
-    "\1\u5b28\1\u5b7c\1\u5bd0\1\u5c24\1\u5c78\1\u5ccc\1\u5d20\1\u5d74"+
-    "\1\u5dc8\1\u5e1c\1\u5e70\1\u5ec4\1\u5f18\1\u5f6c\1\u5fc0\1\u6014"+
-    "\1\u6068\1\u60bc\1\u6110\1\u6164\1\u61b8\1\u620c\1\u6260\1\u62b4"+
-    "\1\u6308\1\u635c\1\u63b0\1\u6404\1\u6458\1\u64ac\1\u6500\1\u6554"+
-    "\1\u65a8\1\u65fc\1\u6650\1\u66a4\1\u66f8\1\u674c\1\u67a0\1\u67f4"+
-    "\1\u6848\1\u689c\1\u68f0\1\u6944\1\u6998\1\u69ec\1\u6a40\1\u6a94"+
-    "\1\u6ae8\1\u6b3c\1\u6b90\1\u6be4\1\u6c38\1\u6c8c\1\u6ce0\1\u6d34"+
-    "\1\u6d88\1\u6ddc\1\u6e30\1\u6e84\1\u6ed8\1\u6f2c\1\u6f80\1\u6fd4"+
-    "\1\u7028\1\u707c\1\u70d0\1\u7124\1\u7178\1\u71cc\1\u7220\1\u7274"+
-    "\1\u72c8\1\u731c\1\u7370\1\u73c4\1\u7418\1\u746c\1\u74c0\1\u7514"+
-    "\1\u7568\1\u75bc\1\u7610\1\u7664\1\u76b8\1\u770c\1\u7760\1\u77b4"+
-    "\1\u7808\1\u785c\1\u78b0\1\u7904\1\u7958\1\u79ac\1\u7a00\1\u7a54"+
-    "\1\u7aa8\1\u7afc\1\u7b50\1\u7ba4\1\u7bf8\1\u7c4c\1\u7ca0\1\u7cf4"+
-    "\1\u7d48\1\u7d9c\1\u7df0\1\u7e44\1\u7e98\1\u7eec\1\u7f40\1\u7f94"+
-    "\1\u7fe8\1\u803c\1\u8090\1\u80e4\1\u8138\1\u818c\1\u81e0\1\u8234"+
-    "\1\u8288\1\u82dc\1\u8330\1\u8384\1\u83d8\1\u842c\1\u8480\1\u84d4"+
-    "\1\u8528\1\u857c\1\u85d0\1\u8624\1\u8678\1\u86cc\1\u8720\1\u8774"+
-    "\1\u87c8\1\u881c\1\u8870\1\u88c4\1\u8918\1\u896c\1\u89c0\1\u8a14"+
-    "\1\u8a68\1\u8abc\1\u8b10\1\u8b64\1\u8bb8\1\u8c0c\1\u8c60\1\u8cb4"+
-    "\1\u8d08\1\u8d5c\1\u8db0\1\u8e04\1\u8e58\1\u8eac\1\u8f00\1\u8f54"+
-    "\1\u8fa8\1\u8ffc\1\u9050\1\u90a4\1\u90f8\1\u914c\1\u91a0\1\u91f4"+
-    "\1\u9248\1\u929c\1\u92f0\1\u9344\1\u9398\1\u93ec\1\u9440\1\u9494"+
-    "\1\u94e8\1\u953c\1\u9590\1\u95e4\1\u9638\1\u968c\1\u96e0\1\u9734"+
-    "\1\u9788\1\u97dc\1\u9830\1\u9884\1\u98d8\1\u992c\1\u9980\1\u99d4"+
-    "\1\u9a28\1\u9a7c\1\u9ad0\1\u9b24\1\u9b78\1\u9bcc\1\u9c20\1\u9c74"+
-    "\1\u9cc8\1\u9d1c\1\u9d70\1\u9dc4\1\u9e18\1\u9e6c\1\u9ec0\1\u9f14"+
-    "\1\u9f68\1\u9fbc\1\ua010\1\ua064\1\ua0b8\1\ua10c\1\ua160\1\ua1b4"+
-    "\1\ua208\1\ua25c\1\ua2b0\1\ua304\1\ua358\1\ua3ac\0\250\0\374"+
-    "\0\374\0\u0a80\0\u0a80\0\u0ad4\0\u0b28\0\u0ec4\0\u03f0\1\ua400"+
-    "\1\ua454\0\u04ec\0\u7d58\1\ua4a8\1\ua4fc\0\u0f6c\0\u0fc0\1\ua550"+
-    "\0\u0348\0\u1014\0\u0ccc\1\ua5a4\1\ua5f8\1\ua64c\0\250\1\ua6a0"+
-    "\1\ua6f4\1\ua748\1\ua79c\1\ua7f0\1\ua844\1\ua898\1\ua8ec\1\ua940"+
-    "\1\ua994\0\u5dd8\1\ua9e8\1\uaa3c\1\uaa90\1\uaae4\1\uab38\1\uab8c"+
-    "\1\uabe0\1\uac34\1\uac88\1\uacdc\0\u69fc\1\uad30\1\uad84\1\uadd8"+
-    "\1\uae2c\1\uae80\1\uaed4\1\uaf28\1\uaf7c\1\uafd0\1\ub024\1\ub078"+
-    "\1\ub0cc\1\ub120\1\ub174\1\ub1c8\1\ub21c\1\ub270\1\ub2c4\1\ub318"+
-    "\1\ub36c\0\u6708\1\ub3c0\1\ub414\1\ub468\1\ub4bc\1\ub510\1\ub564"+
-    "\1\ub5b8\1\ub60c\1\ub660\1\ub6b4\1\ub708\1\ub75c\1\ub7b0\1\ub804"+
-    "\1\ub858\1\ub8ac\0\u8880\1\ub900\1\ub954\0\u7bb4\1\ub9a8\0\u95f4"+
-    "\1\ub9fc\1\uba50\1\ubaa4\1\ubaf8\1\ubb4c\1\ubba0\1\ubbf4\1\ubc48"+
-    "\1\ubc9c\1\ubcf0\1\ubd44\1\ubd98\1\ubdec\1\ube40\1\ube94\1\ubee8"+
-    "\1\ubf3c\1\ubf90\1\ubfe4\1\uc038\1\uc08c\1\uc0e0\1\uc134\1\uc188"+
-    "\1\uc1dc\1\uc230\1\uc284\1\uc2d8\1\uc32c\1\uc380\1\uc3d4\1\uc428"+
-    "\1\uc47c\1\uc4d0\1\uc524\1\uc578\1\uc5cc\1\ub2c4\1\uc620\1\uc674"+
-    "\1\uc6c8\1\uc71c\1\uc770\1\uc7c4\1\uc818\1\uc86c\1\uc8c0\1\uc914"+
-    "\1\uc968\1\uc9bc\1\uca10\1\uca64\1\ucab8\1\ucb0c\1\ucb60\1\ucbb4"+
-    "\1\ucc08\1\ucc5c\1\uccb0\1\ucd04\1\ucd58\1\ucdac\1\uce00\1\uce54"+
-    "\1\ucea8\0\u80f4\1\ucefc\1\ucf50\1\ucfa4\1\ucff8\1\ud04c\1\ud0a0"+
-    "\1\ud0f4\1\ud148\1\ud19c\1\ud1f0\1\ud244\1\ud298\1\ud2ec\1\ud340"+
-    "\1\ud394\1\ud3e8\1\ud43c\1\ud490\1\ud4e4\1\ud538\1\ud58c\1\ud5e0"+
-    "\1\ud634\1\ud688\1\ud6dc\1\ud730\1\ud784\1\ud7d8\1\ud82c\1\ud880"+
-    "\1\ud8d4\1\ud928\1\ud97c\1\ud9d0\1\uda24\1\uda78\1\udacc\1\udb20"+
-    "\1\udb74\1\udbc8\1\udc1c\1\udc70\1\udcc4\1\udd18\1\udd6c\1\uddc0"+
-    "\1\ude14\1\ude68\1\udebc\1\udf10\1\udf64\1\udfb8\1\ue00c\1\ue060"+
-    "\1\ub318\1\ue0b4\1\ue108\1\ue15c\1\ue1b0\1\ue204\1\ue258\1\ue2ac"+
-    "\1\ue300\1\ue354\1\ue3a8\0\u5454\1\ue3fc\1\ue450\1\ue4a4\1\ue4f8"+
-    "\1\ue54c\1\ue5a0\1\ue5f4\1\ue648\1\ue69c\1\ue6f0\1\ue744\1\ue798"+
-    "\0\u8bc8\1\ucc08\1\ue7ec\1\ue840\1\ue894\1\ue8e8\1\ue93c\1\ue990"+
-    "\1\ue9e4\1\uea38\1\uea8c\1\ueae0\1\ueb34\1\ueb88\1\uebdc\1\uec30"+
-    "\1\uec84\1\uecd8\1\ued2c\1\ued80\1\uedd4\1\uee28\1\uee7c\1\ueed0"+
-    "\1\uef24\1\uef78\1\uefcc\1\uf020\1\uf074\1\uf0c8\1\uf11c\1\uf170"+
-    "\1\uf1c4\1\uf218\1\uf26c\1\uf2c0\1\uf314\1\ud19c\1\uf368\1\uf3bc"+
-    "\1\uf410\1\uf464\1\uf4b8\1\uf50c\1\uf560\1\ue354\1\uf5b4\1\uf608"+
-    "\1\uf65c\1\uf6b0\1\uf704\1\uf758\1\uf7ac\1\uf800\1\uf854\1\uf8a8"+
-    "\1\uf8fc\1\uf950\1\uf9a4\1\uf9f8\1\ufa4c\1\ufaa0\1\ufaf4\1\ufb48"+
-    "\1\ufb9c\1\ufbf0\1\ufc44\1\ufc98\1\ufcec\1\ufd40\1\ufd94\0\u7968"+
-    "\1\ufde8\1\ufe3c\1\ufe90\1\ue93c\1\ufee4\1\uff38\1\uff8c\1\uffe0"+
-    "\2\64\2\210\2\334\2\u0130\2\u0184\2\u01d8\2\u022c\2\u0280"+
-    "\2\u02d4\0\u6120\0\u9204\2\u0328\2\u037c\2\u03d0\2\u0424\2\u0478"+
-    "\2\u04cc\2\u0520\2\u0574\2\u05c8\2\u061c\2\u0670\2\u06c4\0\u621c"+
-    "\2\u0718\2\u076c\2\u07c0\2\u0814\2\u0868\2\u08bc\2\u0910\2\u0964"+
-    "\2\u09b8\2\u0a0c\2\u0a60\2\u0ab4\2\u0b08\2\u0b5c\2\u0bb0\2\u0c04"+
-    "\2\u0c58\2\u0cac\2\u0d00\2\u0d54\2\u0da8\2\u0dfc\2\u0e50\2\u0ea4"+
-    "\2\u0ef8\2\u0f4c\2\u0fa0\2\u0ff4\2\u1048\2\u109c\2\u10f0\2\u1144"+
-    "\2\u1198\2\u11ec\2\u1240\2\u1294\2\u12e8\2\u133c\2\u1390\2\u13e4"+
-    "\2\u1438\2\u148c\2\u14e0\2\u1534\2\u1588\2\u15dc\2\u1630\0\u81f0"+
-    "\2\u1684\2\u16d8\2\u172c\2\u1780\2\u17d4\2\u1828\2\u187c\2\u18d0"+
-    "\2\u1924\2\u1978\2\u19cc\2\u1a20\2\u1a74\2\u1ac8\2\u1b1c\2\u1b70"+
-    "\2\u1bc4\2\u1c18\2\u1c6c\2\u1cc0\2\u1d14\2\u1d68\2\u1dbc\2\u1e10"+
-    "\2\u1e64\2\u1eb8\2\u1f0c\2\u1f60\2\u1fb4\2\u2008\2\u205c\2\u20b0"+
-    "\2\u2104\2\u2158\2\u21ac\2\u2200\2\u2254\2\u22a8\2\u22fc\2\u2350"+
-    "\2\u23a4\2\u23f8\2\u244c\2\u24a0\2\u24f4\2\u2548\2\u259c\2\u25f0"+
-    "\0\u8244\2\u2644\2\u2698\2\u26ec\2\u2740\2\u2794\1\ubdec\2\u27e8"+
-    "\2\u283c\2\u2890\2\u28e4\2\u2938\2\u298c\2\u29e0\2\u2a34\2\u2a88"+
-    "\2\u2adc\2\u2b30\2\u2b84\2\u2bd8\2\u2c2c\2\u2c80\2\u2cd4\2\u2d28"+
-    "\2\u2d7c\2\u2dd0\2\u2e24\2\u2e78\2\u2ecc\2\u2f20\2\u2f74\2\u2fc8"+
-    "\2\u301c\2\u3070\2\u30c4\2\u3118\2\u316c\2\u31c0\2\u3214\2\u3268"+
-    "\2\u32bc\2\u1bc4\2\u3310\2\u3364\2\u33b8\2\u340c\2\u3460\2\u34b4"+
-    "\2\u09b8\2\u3508\2\u355c\2\u35b0\2\u3604\2\u3658\2\u36ac\2\u3700"+
-    "\2\u3754\2\u37a8\2\u37fc\2\u3850\2\u38a4\2\u38f8\2\u394c\2\u39a0"+
-    "\2\u39f4\2\u3a48\2\u3a9c\2\u3af0\2\u3b44\2\u3b98\2\u3bec\2\u3c40"+
-    "\2\u3c94\2\u3ce8\2\u3d3c\2\u3d90\2\u3de4\2\u3e38\2\u3e8c\0\u747c"+
-    "\2\u3ee0\2\u3f34\2\u3f88\2\u3fdc\2\u4030\2\u4084\2\u40d8\2\u412c"+
-    "\2\u4180\2\u41d4\2\u4228\2\u427c\2\u42d0\0\u2274\0\ud1ac\2\u4324"+
-    "\2\u4378\2\u43cc\0\u0348\2\u4420\2\u4474\2\u44c8\2\u451c\2\u4570"+
-    "\2\u45c4\2\u4618\2\u466c\2\u46c0\2\u4714\0\ub22c\2\u4768\2\u47bc"+
-    "\2\u4810\2\u4864\2\u48b8\2\u490c\2\u4960\2\u49b4\2\u4a08\2\u4a5c"+
-    "\0\ube50\2\u4ab0\2\u4b04\2\u4b58\2\u4bac\2\u4c00\2\u4c54\2\u4ca8"+
-    "\2\u4cfc\2\u4d50\2\u4da4\2\u4df8\2\u4e4c\2\u4ea0\2\u4ef4\2\u4f48"+
-    "\2\u4f9c\2\u4ff0\2\u5044\2\u5098\2\u50ec\0\ubb5c\2\u5140\2\u5194"+
-    "\2\u51e8\2\u523c\2\u5290\2\u52e4\2\u5338\2\u538c\2\u53e0\2\u5434"+
-    "\2\u5488\2\u54dc\2\u5530\2\u5584\2\u55d8\2\u562c\0\udcd4\2\u5680"+
-    "\2\u56d4\0\ud008\2\u5728\0\uea48\2\u577c\2\u57d0\2\u5824\2\u5878"+
-    "\2\u58cc\2\u5920\2\u5974\2\u59c8\2\u5a1c\2\u5a70\2\u5ac4\2\u5b18"+
-    "\2\u5b6c\2\u5bc0\2\u5c14\2\u5c68\2\u5cbc\2\u5d10\2\u5d64\2\u5db8"+
-    "\2\u5e0c\2\u5e60\2\u5eb4\2\u5f08\2\u5f5c\2\u5fb0\2\u6004\2\u6058"+
-    "\2\u60ac\2\u6100\2\u6154\2\u61a8\2\u61fc\2\u6250\2\u62a4\2\u62f8"+
-    "\2\u634c\2\u5044\2\u63a0\2\u63f4\2\u6448\2\u649c\2\u64f0\2\u6544"+
-    "\2\u6598\2\u65ec\2\u6640\2\u6694\2\u66e8\2\u673c\2\u6790\2\u67e4"+
-    "\2\u6838\2\u688c\2\u68e0\2\u6934\2\u6988\2\u69dc\2\u6a30\2\u6a84"+
-    "\2\u6ad8\2\u6b2c\2\u6b80\2\u6bd4\2\u6c28\0\ud548\2\u6c7c\2\u6cd0"+
-    "\2\u6d24\2\u6d78\2\u6dcc\2\u6e20\2\u6e74\2\u6ec8\2\u6f1c\2\u6f70"+
-    "\2\u6fc4\2\u7018\2\u706c\2\u70c0\2\u7114\2\u7168\2\u71bc\2\u7210"+
-    "\2\u7264\2\u72b8\2\u730c\2\u7360\2\u73b4\2\u7408\2\u745c\2\u74b0"+
-    "\2\u7504\2\u7558\2\u75ac\2\u7600\2\u7654\2\u76a8\2\u76fc\2\u7750"+
-    "\2\u77a4\2\u77f8\2\u784c\2\u78a0\2\u78f4\2\u7948\2\u799c\2\u79f0"+
-    "\2\u7a44\2\u7a98\2\u7aec\2\u7b40\2\u7b94\2\u7be8\2\u7c3c\2\u7c90"+
-    "\2\u7ce4\2\u7d38\2\u7d8c\2\u7de0\2\u5098\2\u7e34\2\u7e88\2\u7edc"+
-    "\2\u7f30\2\u7f84\2\u7fd8\2\u802c\2\u8080\2\u80d4\2\u8128\0\ua8a8"+
-    "\2\u817c\2\u81d0\2\u8224\2\u8278\2\u82cc\2\u8320\2\u8374\2\u83c8"+
-    "\2\u841c\2\u8470\2\u84c4\2\u8518\0\ue01c\2\u6988\2\u856c\2\u85c0"+
-    "\2\u8614\2\u8668\2\u86bc\2\u8710\2\u8764\2\u87b8\2\u880c\2\u8860"+
-    "\2\u88b4\2\u8908\2\u895c\2\u89b0\2\u8a04\2\u8a58\2\u8aac\2\u8b00"+
-    "\2\u8b54\2\u8ba8\2\u8bfc\2\u8c50\2\u8ca4\2\u8cf8\2\u8d4c\2\u8da0"+
-    "\2\u8df4\2\u8e48\2\u8e9c\2\u8ef0\2\u8f44\2\u8f98\2\u8fec\2\u9040"+
-    "\2\u9094\2\u6f1c\2\u90e8\2\u913c\2\u9190\2\u91e4\2\u9238\2\u928c"+
-    "\2\u92e0\2\u80d4\2\u9334\2\u9388\2\u93dc\2\u9430\2\u9484\2\u94d8"+
-    "\2\u952c\2\u9580\2\u95d4\2\u9628\2\u967c\2\u96d0\2\u9724\2\u9778"+
-    "\2\u97cc\2\u9820\2\u9874\2\u98c8\2\u991c\2\u9970\2\u99c4\2\u9a18"+
-    "\2\u9a6c\2\u9ac0\2\u9b14\0\ucdbc\2\u9b68\2\u9bbc\2\u9c10\2\u86bc"+
-    "\2\u9c64\2\u9cb8\2\u9d0c\2\u9d60\2\u9db4\2\u9e08\2\u9e5c\2\u9eb0"+
-    "\2\u9f04\2\u9f58\2\u9fac\2\ua000\2\ua054\0\ub574\0\ue658\2\ua0a8"+
-    "\2\ua0fc\2\ua150\2\ua1a4\2\ua1f8\2\ua24c\2\ua2a0\2\ua2f4\2\ua348"+
-    "\2\ua39c\2\ua3f0\2\ua444\0\ub670\2\ua498\2\ua4ec\2\ua540\2\ua594"+
-    "\2\ua5e8\2\ua63c\2\ua690\2\ua6e4\2\ua738\2\ua78c\2\ua7e0\2\ua834"+
-    "\2\ua888\2\ua8dc\2\ua930\2\ua984\2\ua9d8\2\uaa2c\2\uaa80\2\uaad4"+
-    "\2\uab28\2\uab7c\2\uabd0\2\uac24\2\uac78\2\uaccc\2\uad20\2\uad74"+
-    "\2\uadc8\2\uae1c\2\uae70\2\uaec4\2\uaf18\2\uaf6c\2\uafc0\2\ub014"+
-    "\2\ub068\2\ub0bc\2\ub110\2\ub164\2\ub1b8\2\ub20c\2\ub260\2\ub2b4"+
-    "\2\ub308\2\ub35c\2\ub3b0\0\ud644\2\ub404\2\ub458\2\ub4ac\2\ub500"+
-    "\2\ub554\2\ub5a8\2\ub5fc\2\ub650\2\ub6a4\2\ub6f8\2\ub74c\2\ub7a0"+
-    "\2\ub7f4\2\ub848\2\ub89c\2\ub8f0\2\ub944\2\ub998\2\ub9ec\2\uba40"+
-    "\2\uba94\2\ubae8\2\ubb3c\2\ubb90\2\ubbe4\2\ubc38\2\ubc8c\2\ubce0"+
-    "\2\ubd34\2\ubd88\2\ubddc\2\ube30\2\ube84\2\ubed8\2\ubf2c\2\ubf80"+
-    "\2\ubfd4\2\uc028\2\uc07c\2\uc0d0\2\uc124\2\uc178\2\uc1cc\2\uc220"+
-    "\2\uc274\2\uc2c8\2\uc31c\2\uc370\0\ud698\2\uc3c4\2\uc418\2\uc46c"+
-    "\2\uc4c0\2\uc514\2\u5b6c\2\uc568\2\uc5bc\2\uc610\2\uc664\2\uc6b8"+
-    "\2\uc70c\2\uc760\2\uc7b4\2\uc808\2\uc85c\2\uc8b0\2\uc904\2\uc958"+
-    "\2\uc9ac\2\uca00\2\uca54\2\ucaa8\2\ucafc\2\ucb50\2\ucba4\2\ucbf8"+
-    "\2\ucc4c\2\ucca0\2\uccf4\2\ucd48\2\ucd9c\2\ucdf0\2\uce44\2\uce98"+
-    "\2\uceec\2\ucf40\2\ucf94\2\ucfe8\2\ud03c\2\ub944\2\ud090\2\ud0e4"+
-    "\2\ud138\2\ud18c\2\ud1e0\2\ud234\2\ua738\2\ud288\2\ud2dc\2\ud330"+
-    "\2\ud384\2\ud3d8\2\ud42c\2\ud480\2\ud4d4\2\ud528\2\ud57c\2\ud5d0"+
-    "\2\ud624\2\ud678\2\ud6cc\2\ud720\2\ud774\2\ud7c8\2\ud81c\2\ud870"+
-    "\2\ud8c4\2\ud918\2\ud96c\2\ud9c0\2\uda14\2\uda68\2\udabc\2\udb10"+
-    "\2\udb64\2\udbb8\0\uc8d0\2\udc0c\2\udc60\2\udcb4\2\udd08\2\udd5c"+
-    "\2\uddb0\2\ude04\2\ude58\2\udeac\2\udf00\2\udf54\2\udfa8\2\udffc"+
-    "\2\ue050\2\ue0a4\2\ue0f8\2\ue14c\2\ue1a0\2\ue1f4\2\ue248\2\ue29c"+
-    "\2\ue2f0\2\ue344\2\ue398\2\ue3ec\2\ue440\2\ue494\2\ue4e8\2\ue53c"+
-    "\2\ue590\2\ue5e4\2\ue638\2\ue68c\2\ue6e0\2\ue734\2\ue788\2\ue7dc"+
-    "\2\ue830\2\ue884\2\ue8d8\2\ue92c\2\ue980\2\ue9d4\2\uea28\2\uea7c"+
-    "\2\uead0\2\ueb24\2\ueb78\2\uebcc\2\uec20\2\uec74\2\uecc8\2\ued1c"+
-    "\2\ued70\2\uedc4\2\uee18\2\uee6c\2\ueec0\2\uef14\2\uef68\2\uefbc"+
-    "\2\uf010\2\uf064\2\uf0b8\2\uf10c\2\uf160\2\uf1b4\2\uf208\2\uf25c"+
-    "\2\uf2b0\2\uf304\2\uf358\2\uf3ac\2\uf400\2\uf454\2\uf4a8\2\uf4fc"+
-    "\2\uf550\2\uf5a4\2\uf5f8\2\uf64c\2\uf6a0\2\uf6f4\2\uf748\2\uf79c"+
-    "\2\uf7f0\2\uf844\2\uf898\2\uf8ec\2\uf940\2\uf994\2\uf9e8\2\ufa3c"+
-    "\2\ufa90\2\ufae4\2\ufb38\2\ufb8c\2\ufbe0\2\ufc34\2\ufc88\2\ufcdc"+
-    "\2\ufd30\2\ufd84\2\ufdd8\2\ufe2c\2\ufe80\2\ufed4\2\uff28\2\uff7c"+
-    "\2\uffd0\3\44\3\170\3\314\3\u0120\3\u0174\3\u01c8\3\u021c"+
-    "\3\u0270\3\u02c4\3\u0318\3\u036c\3\u03c0\3\u0414\3\u0468\3\u04bc"+
-    "\3\u0510\3\u0564\3\u05b8\3\u060c\3\u0660\3\u06b4\3\u0708\3\u075c"+
-    "\3\u07b0\3\u0804\3\u0858\3\u08ac\3\u0900\3\u0954\3\u09a8\3\u09fc"+
-    "\3\u0a50\3\u0aa4\3\u0af8\3\u0b4c\3\u0ba0\3\u0bf4\3\u0c48\3\u0c9c"+
-    "\3\u0cf0\3\u0d44\3\u0d98\3\u0dec\3\u0e40\3\u0e94\3\u0ee8\3\u0f3c"+
-    "\3\u0f90\3\u0fe4\3\u1038\3\u108c\3\u10e0\3\u1134\3\u1188\3\u11dc"+
-    "\3\u1230\3\u1284\3\u12d8\3\u132c\3\u1380\3\u13d4\3\u1428\3\u147c"+
-    "\3\u14d0\3\u1524\3\u1578\3\u15cc\3\u1620\3\u1674\3\u16c8\3\u171c"+
-    "\3\u1770\3\u17c4\3\u1818\3\u186c\3\u18c0\3\u1914\3\u1968\3\u19bc"+
-    "\3\u1a10\3\u1a64\3\u1ab8\3\u1b0c\3\u1b60\3\u1bb4\3\u1c08\3\u1c5c"+
-    "\3\u1cb0\3\u1d04\3\u1d58\3\u1dac\3\u1e00\3\u1e54\3\u1ea8\3\u1efc"+
-    "\3\u1f50\3\u1fa4\3\u1ff8\3\u204c\3\u20a0\3\u20f4\3\u2148\3\u219c"+
-    "\3\u21f0\3\u2244\3\u2298\3\u22ec\3\u2340\3\u2394\3\u23e8\3\u243c"+
-    "\3\u2490\3\u24e4\3\u2538\3\u258c\3\u25e0\3\u2634\3\u2688\3\u26dc"+
-    "\3\u2730\3\u2784\3\u27d8\3\u282c\3\u2880\3\u28d4\3\u2928\3\u297c"+
-    "\3\u29d0\3\u2a24\3\u2a78\3\u2acc\3\u2b20\3\u2b74\3\u2bc8\3\u2c1c"+
-    "\3\u2c70\3\u2cc4\3\u2d18\3\u2d6c\3\u2dc0\3\u2e14\3\u2e68\3\u2ebc"+
-    "\3\u2f10\3\u2f64\3\u2fb8\3\u300c\3\u3060\3\u30b4\3\u3108\3\u315c"+
-    "\3\u31b0\3\u3204\3\u3258\3\u32ac\3\u3300\3\u3354\3\u33a8\3\u33fc"+
-    "\3\u3450\3\u34a4\3\u34f8\3\u354c\3\u35a0\3\u35f4\3\u3648\3\u369c"+
-    "\3\u36f0\3\u3744\3\u3798\3\u37ec\3\u3840\3\u3894\3\u38e8\3\u393c"+
-    "\3\u3990\3\u39e4\3\u3a38\3\u3a8c\3\u3ae0\3\u3b34\3\u3b88\3\u3bdc"+
-    "\3\u3c30\3\u3c84\3\u3cd8\3\u3d2c\3\u3d80\3\u3dd4\3\u3e28\3\u3e7c"+
-    "\3\u3ed0\3\u3f24\3\u3f78\3\u3fcc\3\u4020\3\u4074\3\u40c8\3\u411c"+
-    "\3\u4170\3\u41c4\3\u4218\3\u426c\3\u42c0\3\u4314\3\u4368\3\u43bc"+
-    "\3\u4410\3\u4464\3\u44b8\3\u450c\3\u4560\3\u45b4\3\u4608\3\u465c"+
-    "\3\u46b0\3\u4704\3\u4758\3\u47ac\3\u4800\3\u4854\3\u48a8\3\u48fc"+
-    "\3\u4950\3\u49a4\3\u49f8\3\u4a4c\3\u4aa0\3\u4af4\3\u4b48\3\u4b9c"+
-    "\3\u4bf0\3\u4c44\3\u4c98\3\u4cec\3\u4d40\3\u4d94\3\u4de8\3\u4e3c"+
-    "\3\u4e90\3\u4ee4\3\u4f38\3\u4f8c\3\u4fe0\3\u5034\3\u5088\3\u50dc"+
-    "\3\u5130\3\u5184\3\u51d8\3\u522c\3\u5280\3\u52d4\3\u5328\3\u537c"+
-    "\3\u53d0\3\u5424\3\u5478\3\u54cc\3\u5520\3\u5574\3\u55c8\3\u561c"+
-    "\3\u5670\3\u56c4\3\u5718\3\u576c\3\u57c0\3\u5814\3\u5868\3\u58bc"+
-    "\3\u5910\3\u5964\3\u59b8\3\u5a0c\3\u5a60\3\u5ab4\3\u5b08\3\u5b5c"+
-    "\3\u5bb0\3\u5c04\3\u5c58\3\u5cac\3\u5d00\3\u5d54\3\u5da8\3\u5dfc"+
-    "\3\u5e50\3\u5ea4\3\u5ef8\3\u5f4c\3\u5fa0\3\u5ff4\3\u6048\3\u609c"+
-    "\3\u60f0\3\u6144\3\u6198\3\u61ec\3\u6240\3\u6294\3\u62e8\3\u633c"+
-    "\3\u6390\3\u63e4\3\u6438\3\u648c\3\u64e0\3\u6534\3\u6588\3\u65dc"+
-    "\3\u6630\3\u6684\3\u66d8\3\u672c\3\u6780\3\u67d4\3\u6828\3\u687c"+
-    "\3\u68d0\3\u6924\3\u6978\3\u69cc\3\u6a20\3\u6a74\3\u6ac8\3\u6b1c"+
-    "\3\u6b70\3\u6bc4\3\u6c18\3\u6c6c\3\u6cc0\3\u6d14\3\u6d68\3\u6dbc"+
-    "\3\u6e10\3\u6e64\3\u6eb8\3\u6f0c\3\u6f60\3\u6fb4\3\u7008\3\u705c"+
-    "\3\u70b0\3\u7104\3\u7158\3\u71ac\3\u7200\3\u7254\3\u72a8\3\u72fc"+
-    "\3\u7350\3\u73a4\3\u73f8\3\u744c\3\u74a0\3\u74f4\3\u7548\3\u759c"+
-    "\3\u75f0\3\u7644\3\u7698\3\u76ec\3\u7740\3\u7794\3\u77e8\3\u783c"+
-    "\3\u7890\3\u78e4\3\u7938\3\u798c\3\u79e0\3\u7a34\3\u7a88\3\u7adc"+
-    "\3\u7b30\3\u7b84\3\u7bd8\3\u7c2c\3\u7c80\3\u7cd4\3\u7d28\3\u7d7c"+
-    "\3\u7dd0\3\u7e24\3\u7e78\3\u7ecc\3\u7f20\3\u7f74\3\u7fc8\3\u801c"+
-    "\3\u8070\3\u80c4\3\u8118\3\u816c\3\u81c0\3\u8214\3\u8268\3\u82bc"+
-    "\3\u8310\3\u8364\3\u83b8\3\u840c\3\u8460\3\u84b4\3\u8508\3\u855c"+
-    "\3\u85b0\3\u8604\3\u8658\3\u86ac\3\u8700\3\u8754\3\u87a8\3\u87fc"+
-    "\3\u8850\3\u88a4\3\u88f8\3\u894c\3\u89a0\3\u89f4\3\u8a48\3\u8a9c"+
-    "\3\u8af0\3\u8b44\3\u8b98\3\u8bec\3\u8c40\3\u8c94\3\u8ce8\3\u8d3c"+
-    "\3\u8d90\3\u8de4\3\u8e38\3\u8e8c\3\u8ee0\3\u8f34\3\u8f88\3\u8fdc"+
-    "\3\u9030\3\u9084\3\u90d8\3\u912c\3\u9180\3\u91d4\3\u9228\3\u927c"+
-    "\3\u92d0\3\u9324\3\u9378\3\u93cc\3\u9420\3\u9474\3\u94c8\3\u951c"+
-    "\3\u9570\3\u95c4\3\u9618\3\u966c\3\u96c0\3\u9714\3\u9768\3\u97bc"+
-    "\3\u9810\3\u9864\3\u98b8\3\u990c\3\u9960\3\u99b4\3\u9a08\3\u9a5c"+
-    "\3\u9ab0\3\u9b04\3\u9b58\3\u9bac\3\u9c00\3\u9c54\3\u9ca8\3\u9cfc"+
-    "\3\u9d50\3\u9da4\3\u9df8\3\u9e4c\3\u9ea0\3\u9ef4\3\u9f48\3\u9f9c"+
-    "\3\u9ff0\3\ua044\3\ua098\3\ua0ec\3\ua140\3\ua194\3\ua1e8\3\ua23c"+
-    "\3\ua290\3\ua2e4\3\ua338\3\ua38c\3\ua3e0\3\ua434\3\ua488\3\ua4dc"+
-    "\3\ua530\3\ua584\3\ua5d8\3\ua62c\3\ua680\3\ua6d4\3\ua728\3\ua77c"+
-    "\3\ua7d0\3\ua824\3\ua878\3\ua8cc\3\ua920\3\ua974\3\ua9c8\3\uaa1c"+
-    "\3\uaa70\3\uaac4\3\uab18\3\uab6c\3\uabc0\3\uac14\3\uac68\3\uacbc"+
-    "\3\uad10\3\uad64\3\uadb8\3\uae0c\3\uae60\3\uaeb4\3\uaf08\3\uaf5c"+
-    "\3\uafb0\3\ub004\3\ub058\3\ub0ac\3\ub100\3\ub154\3\ub1a8\3\ub1fc"+
-    "\3\ub250\3\ub2a4\3\ub2f8\3\ub34c\3\ub3a0\3\ub3f4\3\ub448\3\ub49c"+
-    "\3\ub4f0\3\ub544\3\ub598\3\ub5ec\3\ub640\3\ub694\3\ub6e8\3\ub73c"+
-    "\3\ub790\3\ub7e4\3\ub838\3\ub88c\3\ub8e0\3\ub934\3\ub988\3\ub9dc"+
-    "\3\uba30\3\uba84\3\ubad8\3\ubb2c\3\ubb80\3\ubbd4\3\ubc28\3\ubc7c"+
-    "\3\ubcd0\3\ubd24\3\ubd78\3\ubdcc\3\ube20\3\ube74\3\ubec8\3\ubf1c"+
-    "\3\ubf70\3\ubfc4\3\uc018\3\uc06c\3\uc0c0\3\uc114\3\uc168\3\uc1bc"+
-    "\3\uc210\3\uc264\3\uc2b8\3\uc30c\3\uc360\3\uc3b4\3\uc408\3\uc45c"+
-    "\3\uc4b0\3\uc504\3\uc558\3\uc5ac\3\uc600\3\uc654\3\uc6a8\3\uc6fc"+
-    "\3\uc750\3\uc7a4\3\uc7f8\3\uc84c\3\uc8a0\3\uc8f4\3\uc948\3\uc99c"+
-    "\3\uc9f0\3\uca44\3\uca98\3\ucaec\3\ucb40\3\ucb94\3\ucbe8\3\ucc3c"+
-    "\3\ucc90\3\ucce4\3\ucd38\3\ucd8c\3\ucde0\3\uce34\3\uce88\3\ucedc"+
-    "\3\ucf30\3\ucf84\3\ucfd8\3\ud02c\3\ud080\3\ud0d4\3\ud128\3\ud17c"+
-    "\3\ud1d0\3\ud224\3\ud278\3\ud2cc\3\ud320\3\ud374\3\ud3c8\3\ud41c"+
-    "\3\ud470\3\ud4c4\3\ud518\3\ud56c\3\ud5c0\3\ud614\3\ud668\3\ud6bc"+
-    "\3\ud710\3\ud764\3\ud7b8\3\ud80c\3\ud860\3\ud8b4\3\ud908\3\ud95c"+
-    "\3\ud9b0\3\uda04\3\uda58\3\udaac\3\udb00\3\udb54\3\udba8\3\udbfc"+
-    "\3\udc50\3\udca4\3\udcf8\3\udd4c\3\udda0\3\uddf4\3\ude48\3\ude9c"+
-    "\3\udef0\3\udf44\3\udf98\3\udfec\3\ue040\3\ue094\3\ue0e8\3\ue13c"+
-    "\3\ue190\3\ue1e4\3\ue238\3\ue28c\3\ue2e0\3\ue334\3\ue388\3\ue3dc"+
-    "\3\ue430\3\ue484\3\ue4d8\3\ue52c\3\ue580\3\ue5d4\3\ue628\3\ue67c"+
-    "\3\ue6d0\3\ue724\3\ue778\3\ue7cc\3\ue820\3\ue874\3\ue8c8\3\ue91c"+
-    "\3\ue970\3\ue9c4\3\uea18\3\uea6c\3\ueac0\3\ueb14\3\ueb68\3\uebbc"+
-    "\3\uec10\3\uec64\3\uecb8\3\ued0c\3\ued60\3\uedb4\3\uee08\3\uee5c"+
-    "\3\ueeb0\3\uef04\3\uef58\3\uefac\3\uf000\3\uf054\3\uf0a8\3\uf0fc"+
-    "\3\uf150\3\uf1a4\3\uf1f8\3\uf24c\3\uf2a0\3\uf2f4\3\uf348\3\uf39c"+
-    "\3\uf3f0\3\uf444\3\uf498\3\uf4ec\3\uf540\3\uf594\3\uf5e8\3\uf63c"+
-    "\3\uf690\3\uf6e4\3\uf738\3\uf78c\3\uf7e0\3\uf834\3\uf888\3\uf8dc"+
-    "\3\uf930\3\uf984\3\uf9d8\3\ufa2c\3\ufa80\3\ufad4\3\ufb28\3\ufb7c"+
-    "\3\ufbd0\3\ufc24\3\ufc78\3\ufccc\3\ufd20\3\ufd74\3\ufdc8\3\ufe1c"+
-    "\3\ufe70\3\ufec4\3\uff18\3\uff6c\3\uffc0\4\24\4\150\4\274"+
-    "\4\u0110\4\u0164\4\u01b8\4\u020c\4\u0260\4\u02b4\4\u0308\4\u035c"+
-    "\4\u03b0\4\u0404\4\u0458\4\u04ac\4\u0500\4\u0554\4\u05a8\4\u05fc"+
-    "\4\u0650\4\u06a4\4\u06f8\4\u074c\4\u07a0\4\u07f4\4\u0848\4\u089c"+
-    "\4\u08f0\4\u0944\4\u0998\4\u09ec\4\u0a40\4\u0a94\4\u0ae8\4\u0b3c"+
-    "\4\u0b90\4\u0be4\4\u0c38\4\u0c8c\4\u0ce0\4\u0d34\4\u0d88\4\u0ddc"+
-    "\4\u0e30\4\u0e84\4\u0ed8\4\u0f2c\4\u0f80\4\u0fd4\4\u1028\4\u107c"+
-    "\4\u10d0\4\u1124\4\u1178\4\u11cc\4\u1220\4\u1274\4\u12c8\4\u131c"+
-    "\4\u1370\4\u13c4\4\u1418\4\u146c\4\u14c0\4\u1514\4\u1568\4\u15bc"+
-    "\4\u1610\4\u1664\4\u16b8\4\u170c\4\u1760\4\u17b4\4\u1808\4\u185c"+
-    "\4\u18b0\4\u1904\4\u1958\4\u19ac\4\u1a00\4\u1a54\4\u1aa8\4\u1afc"+
-    "\4\u1b50\4\u1ba4\4\u1bf8\4\u1c4c\4\u1ca0\4\u1cf4\4\u1d48\4\u1d9c"+
-    "\4\u1df0\4\u1e44\4\u1e98\4\u1eec\4\u1f40\4\u1f94\4\u1fe8\4\u203c"+
-    "\4\u2090\4\u20e4\4\u2138\4\u218c\4\u21e0\4\u2234\4\u2288\4\u22dc"+
-    "\4\u2330\4\u2384\4\u23d8\4\u242c\4\u2480\4\u24d4\4\u2528\4\u257c"+
-    "\4\u25d0\4\u2624\4\u2678\4\u26cc\4\u2720\4\u2774\4\u27c8\4\u281c"+
-    "\4\u2870\4\u28c4\4\u2918\4\u296c\4\u29c0\4\u2a14\4\u2a68\4\u2abc"+
-    "\4\u2b10\4\u2b64\4\u2bb8\4\u2c0c\4\u2c60\4\u2cb4\4\u2d08\4\u2d5c"+
-    "\4\u2db0\4\u2e04\4\u2e58\4\u2eac\4\u2f00\4\u2f54\4\u2fa8\4\u2ffc"+
-    "\4\u3050\4\u30a4\4\u30f8\4\u314c\4\u31a0\4\u31f4\4\u3248\4\u329c"+
-    "\4\u32f0\4\u3344\4\u3398\4\u33ec\4\u3440\4\u3494\4\u34e8\4\u353c"+
-    "\4\u3590\4\u35e4\4\u3638\4\u368c\4\u36e0\4\u3734\4\u3788\4\u37dc"+
-    "\4\u3830\4\u3884\4\u38d8\4\u392c\4\u3980\4\u39d4\4\u3a28\4\u3a7c"+
-    "\4\u3ad0\4\u3b24\4\u3b78\4\u3bcc\4\u3c20\4\u3c74\4\u3cc8\4\u3d1c"+
-    "\4\u3d70\4\u3dc4\4\u3e18\4\u3e6c\4\u3ec0\4\u3f14\4\u3f68\4\u3fbc"+
-    "\4\u4010\4\u4064\4\u40b8\4\u410c\4\u4160\4\u41b4\4\u4208\4\u425c"+
-    "\4\u42b0\4\u4304\4\u4358\4\u43ac\4\u4400\4\u4454\4\u44a8\4\u44fc"+
-    "\4\u4550\4\u45a4\4\u45f8\4\u464c\4\u46a0\4\u46f4\4\u4748\4\u479c"+
-    "\4\u47f0\4\u4844\4\u4898\4\u48ec\4\u4940\4\u4994\4\u49e8\4\u4a3c"+
-    "\4\u4a90\4\u4ae4\4\u4b38\4\u4b8c\4\u4be0\4\u4c34\4\u4c88\4\u4cdc"+
-    "\4\u4d30\4\u4d84\4\u4dd8\4\u4e2c\4\u4e80\4\u4ed4\4\u4f28\4\u4f7c"+
-    "\4\u4fd0\4\u5024\4\u5078\4\u50cc\4\u5120\4\u5174\4\u51c8\4\u521c"+
-    "\4\u5270\4\u52c4\4\u5318\4\u536c\4\u53c0\4\u5414\4\u5468\4\u54bc"+
-    "\4\u5510\4\u5564\4\u55b8\4\u560c\4\u5660\4\u56b4\4\u5708\4\u575c"+
-    "\4\u57b0\4\u5804\4\u5858\4\u58ac\4\u5900\4\u5954\4\u59a8\4\u59fc"+
-    "\4\u5a50\4\u5aa4\4\u5af8\4\u5b4c\4\u5ba0\4\u5bf4\4\u5c48\4\u5c9c"+
-    "\4\u5cf0\4\u5d44\4\u5d98\4\u5dec\4\u5e40\4\u5e94\4\u5ee8\4\u5f3c"+
-    "\4\u5f90\4\u5fe4\4\u6038\4\u608c\4\u60e0\4\u6134\4\u6188\4\u61dc"+
-    "\4\u6230\4\u6284\4\u62d8\4\u632c\4\u6380\4\u63d4\4\u6428\4\u647c"+
-    "\4\u64d0\4\u6524\4\u6578\4\u65cc\4\u6620\4\u6674\4\u66c8\4\u671c"+
-    "\4\u6770\4\u67c4\4\u6818\4\u686c\4\u68c0\4\u6914\4\u6968\4\u69bc"+
-    "\4\u6a10\4\u6a64\4\u6ab8\4\u6b0c\4\u6b60\4\u6bb4\4\u6c08\4\u6c5c"+
-    "\4\u6cb0\4\u6d04\4\u6d58\4\u6dac\4\u6e00\4\u6e54\4\u6ea8\4\u6efc"+
-    "\4\u6f50\4\u6fa4\4\u6ff8\4\u704c\4\u70a0\4\u70f4\4\u7148\4\u719c"+
-    "\4\u71f0\4\u7244\4\u7298\4\u72ec\0\u039c\0\u0ad4\0\u0c78\0\u19ec"+
-    "\0\u1a40\0\u1a94\0\u1ae8\0\u1b3c\0\u1b90\0\u1be4\0\u1c38\0\u1c8c"+
-    "\0\u1ce0\0\u1d34\0\u1d88\0\u1ddc\0\u1e30\0\u1e84\0\u1ed8\0\u1f2c"+
-    "\0\u1f80\0\u1fd4\0\u2028\0\u207c\0\u20d0\0\u2124\0\u2178\0\u21cc"+
-    "\0\u2220\4\u7340\4\u7394\4\u73e8\4\u743c\4\u7490\4\u74e4\4\u7538"+
-    "\4\u758c\4\u75e0\4\u7634\4\u7688\1\ud880\4\u76dc\4\u7730\4\u7784"+
-    "\4\u77d8\4\u782c\4\u7880\4\u78d4\4\u7928\4\u797c\4\u79d0\4\u7a24"+
-    "\1\uccb0\4\u7a78\4\u7acc\4\u7b20\4\u7b74\4\u7bc8\4\u7c1c\4\u7c70"+
-    "\4\u7cc4\4\u7d18\4\u7d6c\4\u7dc0\4\u7e14\4\u7e68\4\u7ebc\4\u7f10"+
-    "\4\u7f64\4\u7fb8\4\u800c\1\uaed4\4\u8060\2\u298c\1\ucdac\4\u80b4"+
-    "\4\u8108\4\u815c\4\u81b0\2\u2b30\4\u8204\4\u8258\4\u82ac\4\u8300"+
-    "\4\u8354\4\u83a8\4\u83fc\4\u8450\4\u84a4\4\u84f8\1\ua4fc\4\u854c"+
-    "\4\u85a0\4\u85f4\4\u8648\4\u869c\4\u86f0\4\u8744\4\u8798\4\u87ec"+
-    "\4\u8840\4\u8894\4\u88e8\4\u893c\4\u8990\4\u89e4\4\u8a38\4\u8a8c"+
-    "\4\u8ae0\4\u8b34\4\u8b88\4\u8bdc\4\u8c30\1\ue300\4\u8c84\4\u8cd8"+
-    "\4\u8d2c\4\u8d80\4\u8dd4\4\u8e28\4\u8e7c\4\u8ed0\4\u8f24\4\u8f78"+
-    "\4\u8fcc\4\u9020\4\u9074\4\u90c8\2\64\4\u911c\4\u9170\4\u91c4"+
-    "\4\u9218\4\u926c\4\u92c0\4\u9314\4\u9368\4\u93bc\4\u9410\4\u9464"+
-    "\4\u94b8\4\u950c\4\u9560\4\u95b4\1\ufcec\4\u9608\4\u965c\4\u96b0"+
-    "\4\u9704\4\u9758\4\u97ac\4\u9800\4\u9854\4\u98a8\4\u98fc\4\u9950"+
-    "\4\u99a4\4\u99f8\4\u9a4c\4\u9aa0\4\u9af4\4\u9b48\4\u9b9c\4\u9bf0"+
-    "\4\u9c44\4\u9c98\4\u9cec\4\u9d40\4\u9d94\4\u9de8\4\u9e3c\4\u9e90"+
-    "\1\uc674\2\u08bc\4\u9ee4\4\u9f38\4\u9f8c\4\u9fe0\4\ua034\0\u8f64"+
-    "\4\ua088\2\u07c0\4\ua0dc\4\ua130\4\ua184\4\ua1d8\1\ue108\4\ua22c"+
-    "\4\ua280\4\ua2d4\4\ua328\4\ua37c\4\ua3d0\4\ua424\4\ua478\4\ua4cc"+
-    "\4\ua520\0\u5010\4\ua574\4\ua5c8\4\ua61c\4\ua670\4\ua6c4\4\ua718"+
-    "\2\u2794\4\ua76c\4\ua7c0\4\ua814\4\ua868\1\uc4d0\4\ua8bc\4\ua910"+
-    "\4\ua964\4\ua9b8\4\uaa0c\4\uaa60\4\uaab4\4\uab08\4\uab5c\4\uabb0"+
-    "\4\uac04\4\uac58\4\uacac\4\uad00\4\uad54\4\uada8\4\uadfc\4\uae50"+
-    "\4\uaea4\4\uaef8\4\uaf4c\4\uafa0\4\uaff4\4\ub048\4\ub09c\4\ub0f0"+
-    "\4\ub144\1\ubb4c\4\ub198\4\ub1ec\4\ub240\1\uf218\1\ub1c8\4\ub294"+
-    "\4\ub2e8\4\ub33c\4\ub390\4\ub3e4\4\ub438\4\ub48c\4\ub4e0\4\ub534"+
-    "\4\ub588\4\ub5dc\4\ub630\4\ub684\4\ub6d8\4\ub72c\4\ub780\4\ub7d4"+
-    "\4\ub828\4\ub87c\4\ub8d0\4\ub924\4\ub978\1\ue6f0\4\ub9cc\4\uba20"+
-    "\4\uba74\4\ubac8\4\ubb1c\4\ubb70\4\ubbc4\4\ubc18\4\ubc6c\4\ubcc0"+
-    "\4\ubd14\4\ubd68\4\ubdbc\4\ube10\4\ube64\4\ubeb8\4\ubf0c\4\ubf60"+
-    "\4\ubfb4\4\uc008\4\uc05c\4\uc0b0\4\uc104\4\uc158\4\uc1ac\4\uc200"+
-    "\4\uc254\4\uc2a8\4\uc2fc\4\uc350\4\uc3a4\4\uc3f8\4\uc44c\4\uc4a0"+
-    "\4\uc4f4\4\uc548\4\uc59c\4\uc5f0\4\uc644\4\uc698\4\uc6ec\4\uc740"+
-    "\4\uc794\4\uc7e8\4\uc83c\4\uc890\4\uc8e4\4\uc938\4\uc98c\4\uc9e0"+
-    "\4\uca34\4\uca88\1\ue258\4\ucadc\4\ucb30\4\ucb84\4\ucbd8\4\ucc2c"+
-    "\4\ucc80\4\uccd4\4\ucd28\1\ucd04\4\ucd7c\4\ucdd0\4\uce24\4\uce78"+
-    "\4\ucecc\4\ucf20\4\uaf4c\4\ucf74\4\ucfc8\4\ud01c\0\u7914\4\ud070"+
-    "\4\ud0c4\4\ud118\4\ud16c\4\ud1c0\4\ud214\4\ud268\4\ud2bc\4\ud310"+
-    "\4\ud364\4\ud3b8\4\ud40c\4\ud460\4\ud4b4\4\ud508\4\ud55c\4\ud5b0"+
-    "\1\uafd0\4\ud604\4\ud658\4\ud6ac\4\ud700\4\ud754\4\ud7a8\4\ud7fc"+
-    "\4\ud850\4\ud8a4\4\ud8f8\4\ud94c\4\ud9a0\4\ud9f4\4\uda48\4\uda9c"+
-    "\4\udaf0\4\udb44\4\udb98\4\udbec\4\udc40\4\udc94\4\udce8\4\udd3c"+
-    "\4\udd90\4\udde4\4\ude38\4\ude8c\1\ufd40\4\udee0\4\udf34\4\udf88"+
-    "\4\udfdc\4\u8d80\4\ue030\4\ue084\4\ue0d8\4\ue12c\4\uab08\4\ue180"+
-    "\4\ue1d4\4\ue228\4\ue27c\4\ue2d0\1\ub414\4\ue324\4\ue378\4\ue3cc"+
-    "\4\ue420\4\ue474\4\ue4c8\4\ue51c\4\ue570\4\ue5c4\4\ue618\2\u1c6c"+
-    "\4\ue66c\4\ue6c0\4\ub588\4\ue714\4\ue768\4\ue7bc\4\ue810\4\ue864"+
-    "\4\ue8b8\4\ue90c\4\ue960\4\ue9b4\4\uea08\2\u1294\4\uea5c\4\ueab0"+
-    "\4\ueb04\4\ueb58\4\uebac\4\uec00\4\uec54\4\ueca8\0\u2514\0\u2568"+
-    "\0\u25bc\0\u2610\0\u2664\0\u26b8\0\u270c\0\u2760\0\u27b4\0\u2808"+
-    "\0\u285c\0\u28b0\0\u2904\0\u2958\0\u29ac\0\u2a00\0\u2a54\0\u2aa8"+
-    "\0\u2afc\0\u2b50\0\u2ba4\0\u2bf8\0\u2c4c\0\u2ca0\0\u2cf4\0\u2d48"+
-    "\4\uecfc\2\u7600\4\ued50\4\ueda4\4\uedf8\4\uee4c\4\ueea0\4\ueef4"+
-    "\4\uef48\4\uef9c\4\ueff0\4\uf044\4\uf098\2\u6a30\4\uf0ec\4\uf140"+
-    "\4\uf194\4\uf1e8\4\uf23c\4\uf290\4\uf2e4\4\uf338\4\uf38c\4\uf3e0"+
-    "\4\uf434\4\uf488\4\uf4dc\4\uf530\4\uf584\4\uf5d8\4\uf62c\4\uf680"+
-    "\2\u4c54\4\uf6d4\2\uc70c\2\u6b2c\4\uf728\4\uf77c\4\uf7d0\4\uf824"+
-    "\2\uc8b0\4\uf878\4\uf8cc\4\uf920\4\uf974\4\uf9c8\4\ufa1c\4\ufa70"+
-    "\4\ufac4\4\ufb18\4\ufb6c\2\u4378\4\ufbc0\4\ufc14\4\ufc68\4\ufcbc"+
-    "\4\ufd10\4\ufd64\4\ufdb8\4\ufe0c\4\ufe60\4\ufeb4\4\uff08\4\uff5c"+
-    "\4\uffb0\5\4\5\130\5\254\5\u0100\5\u0154\5\u01a8\5\u01fc"+
-    "\5\u0250\5\u02a4\2\u8080\5\u02f8\5\u034c\5\u03a0\5\u03f4\5\u0448"+
-    "\5\u049c\5\u04f0\5\u0544\5\u0598\5\u05ec\5\u0640\5\u0694\5\u06e8"+
-    "\5\u073c\2\u9db4\5\u0790\5\u07e4\5\u0838\5\u088c\5\u08e0\5\u0934"+
-    "\5\u0988\5\u09dc\5\u0a30\5\u0a84\5\u0ad8\5\u0b2c\5\u0b80\5\u0bd4"+
-    "\5\u0c28\2\u9a6c\5\u0c7c\5\u0cd0\5\u0d24\5\u0d78\5\u0dcc\5\u0e20"+
-    "\5\u0e74\5\u0ec8\5\u0f1c\5\u0f70\5\u0fc4\5\u1018\5\u106c\5\u10c0"+
-    "\5\u1114\5\u1168\5\u11bc\5\u1210\5\u1264\5\u12b8\5\u130c\5\u1360"+
-    "\5\u13b4\5\u1408\5\u145c\5\u14b0\5\u1504\2\u63f4\2\ua63c\5\u1558"+
-    "\5\u15ac\5\u1600\5\u1654\5\u16a8\0\ue3b8\5\u16fc\2\ua540\5\u1750"+
-    "\5\u17a4\5\u17f8\5\u184c\2\u7e88\5\u18a0\5\u18f4\5\u1948\5\u199c"+
-    "\5\u19f0\5\u1a44\5\u1a98\5\u1aec\5\u1b40\5\u1b94\0\ua464\5\u1be8"+
-    "\5\u1c3c\5\u1c90\5\u1ce4\5\u1d38\5\u1d8c\2\uc514\5\u1de0\5\u1e34"+
-    "\5\u1e88\5\u1edc\2\u6250\5\u1f30\5\u1f84\5\u1fd8\5\u202c\5\u2080"+
-    "\5\u20d4\5\u2128\5\u217c\5\u21d0\5\u2224\5\u2278\5\u22cc\5\u2320"+
-    "\5\u2374\5\u23c8\5\u241c\5\u2470\5\u24c4\5\u2518\5\u256c\5\u25c0"+
-    "\5\u2614\5\u2668\5\u26bc\5\u2710\5\u2764\5\u27b8\2\u58cc\5\u280c"+
-    "\5\u2860\5\u28b4\2\u8f98\2\u4f48\5\u2908\5\u295c\5\u29b0\5\u2a04"+
-    "\5\u2a58\5\u2aac\5\u2b00\5\u2b54\5\u2ba8\5\u2bfc\5\u2c50\5\u2ca4"+
-    "\5\u2cf8\5\u2d4c\5\u2da0\5\u2df4\5\u2e48\5\u2e9c\5\u2ef0\5\u2f44"+
-    "\5\u2f98\5\u2fec\2\u8470\5\u3040\5\u3094\5\u30e8\5\u313c\5\u3190"+
-    "\5\u31e4\5\u3238\5\u328c\5\u32e0\5\u3334\5\u3388\5\u33dc\5\u3430"+
-    "\5\u3484\5\u34d8\5\u352c\5\u3580\5\u35d4\5\u3628\5\u367c\5\u36d0"+
-    "\5\u3724\5\u3778\5\u37cc\5\u3820\5\u3874\5\u38c8\5\u391c\5\u3970"+
-    "\5\u39c4\5\u3a18\5\u3a6c\5\u3ac0\5\u3b14\5\u3b68\5\u3bbc\5\u3c10"+
-    "\5\u3c64\5\u3cb8\5\u3d0c\5\u3d60\5\u3db4\5\u3e08\5\u3e5c\5\u3eb0"+
-    "\5\u3f04\5\u3f58\5\u3fac\5\u4000\5\u4054\5\u40a8\5\u40fc\2\u7fd8"+
-    "\5\u4150\5\u41a4\5\u41f8\5\u424c\5\u42a0\5\u42f4\5\u4348\5\u439c"+
-    "\2\u6a84\5\u43f0\5\u4444\5\u4498\5\u44ec\5\u4540\5\u4594\5\u25c0"+
-    "\5\u45e8\5\u463c\5\u4690\0\ucd68\5\u46e4\5\u4738\5\u478c\5\u47e0"+
-    "\5\u4834\5\u4888\5\u48dc\5\u4930\5\u4984\5\u49d8\5\u4a2c\5\u4a80"+
-    "\5\u4ad4\5\u4b28\5\u4b7c\5\u4bd0\5\u4c24\2\u4d50\5\u4c78\5\u4ccc"+
-    "\5\u4d20\5\u4d74\5\u4dc8\5\u4e1c\5\u4e70\5\u4ec4\5\u4f18\5\u4f6c"+
-    "\5\u4fc0\5\u5014\5\u5068\5\u50bc\5\u5110\5\u5164\5\u51b8\5\u520c"+
-    "\5\u5260\5\u52b4\5\u5308\5\u535c\5\u53b0\5\u5404\5\u5458\5\u54ac"+
-    "\5\u5500\2\u9ac0\5\u5554\5\u55a8\5\u55fc\5\u5650\5\u03f4\5\u56a4"+
-    "\5\u56f8\5\u574c\5\u57a0\5\u217c\5\u57f4\5\u5848\5\u589c\5\u58f0"+
-    "\5\u5944\2\u5194\5\u5998\5\u59ec\5\u5a40\5\u5a94\5\u5ae8\5\u5b3c"+
-    "\5\u5b90\5\u5be4\5\u5c38\5\u5c8c\2\ub9ec\5\u5ce0\5\u2bfc\5\u5d34"+
-    "\5\u5d88\5\u5ddc\5\u5e30\5\u5e84\5\u5ed8\5\u5f2c\5\u5f80\5\u5fd4"+
-    "\5\u6028\2\ub014\5\u607c\5\u60d0\5\u6124\5\u6178\5\u61cc\5\u6220"+
-    "\5\u6274\5\u62c8\5\u631c\5\u6370\5\u63c4\5\u6418\5\u646c\5\u64c0"+
-    "\5\u6514\5\u6568\5\u65bc\5\u6610\5\u6664\5\u66b8\5\u670c\5\u6760"+
-    "\5\u67b4\5\u6808\5\u685c\5\u68b0\5\u6904\5\u6958\5\u69ac\5\u6a00"+
-    "\5\u6a54\5\u6aa8\5\u6afc\5\u6b50\5\u6ba4\5\u6bf8\5\u6c4c\5\u6ca0"+
-    "\5\u6cf4\5\u6d48\5\u6d9c\5\u6df0\5\u6e44\5\u6e98\5\u6eec\5\u6f40"+
-    "\5\u6f94\5\u6fe8\5\u703c\5\u7090\5\u70e4\5\u7138\5\u718c\5\u71e0"+
-    "\5\u7234\5\u7288\5\u72dc\5\u7330\5\u7384\5\u73d8\5\u742c\5\u7480"+
-    "\5\u74d4\5\u7528\5\u757c\5\u75d0\5\u7624\5\u7678\5\u76cc\5\u7720"+
-    "\5\u7774\5\u77c8\5\u781c\5\u7870\5\u78c4\5\u7918\5\u796c\5\u79c0"+
-    "\5\u7a14\5\u7a68\5\u7abc\5\u7b10\5\u7b64\5\u7bb8\5\u7c0c\5\u7c60"+
-    "\5\u7cb4\5\u7d08\5\u7d5c\5\u7db0\5\u7e04\5\u7e58\5\u7eac\5\u7f00"+
-    "\5\u7f54\5\u7fa8\5\u7ffc\5\u8050\5\u80a4\5\u80f8\5\u814c\5\u81a0"+
-    "\5\u81f4\5\u8248\5\u829c\5\u82f0\5\u8344\5\u8398\5\u83ec\5\u8440"+
-    "\5\u8494\5\u84e8\5\u853c\5\u8590\5\u85e4\5\u8638\5\u868c\5\u86e0"+
-    "\5\u8734\5\u8788\5\u87dc\5\u8830\5\u8884\5\u88d8\5\u892c\5\u8980"+
-    "\5\u89d4\5\u8a28\5\u8a7c\5\u8ad0\5\u8b24\5\u8b78\5\u8bcc\5\u8c20"+
-    "\5\u8c74\5\u8cc8\5\u8d1c\5\u8d70\5\u8dc4\5\u8e18\5\u8e6c\5\u8ec0"+
-    "\5\u8f14\5\u8f68\5\u8fbc\5\u9010\5\u9064\5\u90b8\5\u910c\5\u9160"+
-    "\5\u91b4\5\u9208\5\u925c\5\u92b0\5\u9304\5\u9358\5\u93ac\5\u9400"+
-    "\5\u9454\5\u94a8\5\u94fc\5\u9550\5\u95a4\5\u95f8\5\u964c\5\u96a0"+
-    "\5\u96f4\5\u9748\5\u979c\5\u97f0\5\u9844\5\u9898\5\u98ec\5\u9940"+
-    "\5\u9994\5\u99e8\5\u9a3c\5\u9a90\5\u9ae4\5\u9b38\5\u9b8c\5\u9be0"+
-    "\5\u9c34\5\u9c88\5\u9cdc\5\u9d30\5\u9d84\5\u9dd8\5\u9e2c\5\u9e80"+
-    "\5\u9ed4\5\u9f28\5\u9f7c\5\u9fd0\5\ua024\5\ua078\5\ua0cc\5\ua120"+
-    "\5\ua174\5\ua1c8\5\ua21c\5\ua270\5\ua2c4\5\ua318\5\ua36c\5\ua3c0"+
-    "\5\ua414\5\ua468\5\ua4bc\5\ua510\5\ua564\5\ua5b8\5\ua60c\5\ua660"+
-    "\5\ua6b4\5\ua708\5\ua75c\5\ua7b0\5\ua804\5\ua858\5\ua8ac\5\ua900"+
-    "\5\ua954\5\ua9a8\5\ua9fc\5\uaa50\5\uaaa4\5\uaaf8\5\uab4c\5\uaba0"+
-    "\5\uabf4\5\uac48\5\uac9c\5\uacf0\5\uad44\5\uad98\5\uadec\5\uae40"+
-    "\5\uae94\5\uaee8\5\uaf3c\5\uaf90\5\uafe4\5\ub038\5\ub08c\5\ub0e0"+
-    "\5\ub134\5\ub188\5\ub1dc\5\ub230\5\ub284\5\ub2d8\5\ub32c\5\ub380"+
-    "\5\ub3d4\5\ub428\5\ub47c\5\ub4d0\5\ub524\5\ub578\5\ub5cc\5\ub620"+
-    "\5\ub674\5\ub6c8\5\ub71c\5\ub770\5\ub7c4\5\ub818\5\ub86c\5\ub8c0"+
-    "\5\ub914\5\ub968\5\ub9bc\5\uba10\5\uba64\5\ubab8\5\ubb0c\5\ubb60"+
-    "\5\ubbb4\5\ubc08\5\ubc5c\5\ubcb0\5\ubd04\5\ubd58\5\ubdac\5\ube00"+
-    "\5\ube54\5\ubea8\5\ubefc\5\ubf50\5\ubfa4\5\ubff8\5\uc04c\5\uc0a0"+
-    "\5\uc0f4\5\uc148\5\uc19c\5\uc1f0\5\uc244\5\uc298\5\uc2ec\5\uc340"+
-    "\5\uc394\5\uc3e8\5\uc43c\5\uc490\5\uc4e4\5\uc538\5\uc58c\5\uc5e0"+
-    "\5\uc634\5\uc688\5\uc6dc\5\uc730\5\uc784\5\uc7d8\5\uc82c\5\uc880"+
-    "\5\uc8d4\5\uc928\5\uc97c\5\uc9d0\5\uca24\5\uca78\5\ucacc\5\ucb20"+
-    "\5\ucb74\5\ucbc8\5\ucc1c\5\ucc70\5\uccc4\5\ucd18\5\ucd6c\5\ucdc0"+
-    "\5\uce14\5\uce68\5\ucebc\5\ucf10\5\ucf64\5\ucfb8\5\ud00c\5\ud060"+
-    "\5\ud0b4\5\ud108\5\ud15c\5\ud1b0\5\ud204\5\ud258\5\ud2ac\5\ud300"+
-    "\5\ud354\5\ud3a8\5\ud3fc\5\ud450\5\ud4a4\5\ud4f8\5\ud54c\5\ud5a0"+
-    "\5\ud5f4\5\ud648\5\ud69c\5\ud6f0\5\ud744\5\ud798\5\ud7ec\5\ud840"+
-    "\5\ud894\5\ud8e8\5\ud93c\5\ud990\5\ud9e4\5\uda38\5\uda8c\5\udae0"+
-    "\5\udb34\5\udb88\5\udbdc\5\udc30\5\udc84\5\udcd8\5\udd2c\5\udd80"+
-    "\5\uddd4\5\ude28\5\ude7c\5\uded0\5\udf24\5\udf78\5\udfcc\5\ue020"+
-    "\5\ue074\5\ue0c8\5\ue11c\5\ue170\5\ue1c4\5\ue218\5\ue26c\5\ue2c0"+
-    "\5\ue314\5\ue368\5\ue3bc\5\ue410\5\ue464\5\ue4b8\5\ue50c\5\ue560"+
-    "\5\ue5b4\5\ue608\5\ue65c\5\ue6b0\5\ue704\5\ue758\5\ue7ac\5\ue800"+
-    "\5\ue854\5\ue8a8\5\ue8fc\5\ue950\5\ue9a4\5\ue9f8\5\uea4c\5\ueaa0"+
-    "\5\ueaf4\5\ueb48\5\ueb9c\5\uebf0\5\uec44\5\uec98\5\uecec\5\ued40"+
-    "\5\ued94\5\uede8\5\uee3c\5\uee90\5\ueee4\5\uef38\5\uef8c\5\uefe0"+
-    "\5\uf034\5\uf088\5\uf0dc\5\uf130\5\uf184\5\uf1d8\5\uf22c\5\uf280"+
-    "\5\uf2d4\5\uf328\5\uf37c\5\uf3d0\5\uf424\5\uf478\5\uf4cc\5\uf520"+
-    "\5\uf574\5\uf5c8\5\uf61c\5\uf670\5\uf6c4\5\uf718\5\uf76c\5\uf7c0"+
-    "\5\uf814\5\uf868\5\uf8bc\5\uf910\5\uf964\5\uf9b8\5\ufa0c\5\ufa60"+
-    "\5\ufab4\5\ufb08\5\ufb5c\5\ufbb0\5\ufc04\5\ufc58\5\ufcac\5\ufd00"+
-    "\5\ufd54\5\ufda8\5\ufdfc\5\ufe50\5\ufea4\5\ufef8\5\uff4c\5\uffa0"+
-    "\5\ufff4\6\110\6\234\6\360\6\u0144\6\u0198\6\u01ec\6\u0240"+
-    "\6\u0294\6\u02e8\6\u033c\6\u0390\6\u03e4\6\u0438\6\u048c\6\u04e0"+
-    "\6\u0534\6\u0588\6\u05dc\6\u0630\6\u0684\6\u06d8\6\u072c\6\u0780"+
-    "\6\u07d4\6\u0828\6\u087c\6\u08d0\6\u0924\6\u0978\6\u09cc\6\u0a20"+
-    "\6\u0a74\6\u0ac8\6\u0b1c\6\u0b70\6\u0bc4\6\u0c18\6\u0c6c\6\u0cc0"+
-    "\6\u0d14\6\u0d68\6\u0dbc\6\u0e10\6\u0e64\6\u0eb8\6\u0f0c\6\u0f60"+
-    "\6\u0fb4\6\u1008\6\u105c\6\u10b0\6\u1104\6\u1158\6\u11ac\6\u1200"+
-    "\6\u1254\6\u12a8\6\u12fc\6\u1350\6\u13a4\6\u13f8\6\u144c\6\u14a0"+
-    "\6\u14f4\6\u1548\6\u159c\6\u15f0\6\u1644\6\u1698\6\u16ec\6\u1740"+
-    "\6\u1794\6\u17e8\6\u183c\6\u1890\6\u18e4\6\u1938\6\u198c\6\u19e0"+
-    "\6\u1a34\6\u1a88\6\u1adc\6\u1b30\6\u1b84\6\u1bd8\6\u1c2c\6\u1c80"+
-    "\6\u1cd4\6\u1d28\6\u1d7c\6\u1dd0\6\u1e24\6\u1e78\6\u1ecc\6\u1f20"+
-    "\6\u1f74\6\u1fc8\6\u201c\6\u2070\6\u20c4\6\u2118\6\u216c\6\u21c0"+
-    "\6\u2214\6\u2268\6\u22bc\6\u2310\6\u2364\6\u23b8\6\u240c\6\u2460"+
-    "\6\u24b4\6\u2508\6\u255c\6\u25b0\6\u2604\6\u2658\6\u26ac\6\u2700"+
-    "\6\u2754\6\u27a8\6\u27fc\6\u2850\6\u28a4\6\u28f8\6\u294c\6\u29a0"+
-    "\6\u29f4\6\u2a48\6\u2a9c\6\u2af0\6\u2b44\6\u2b98\6\u2bec\6\u2c40"+
-    "\6\u2c94\6\u2ce8\6\u2d3c\6\u2d90\6\u2de4\6\u2e38\6\u2e8c\6\u2ee0"+
-    "\6\u2f34\6\u2f88\6\u2fdc\6\u3030\6\u3084\6\u30d8\6\u312c\6\u3180"+
-    "\6\u31d4\6\u3228\6\u327c\6\u32d0\6\u3324\6\u3378\6\u33cc\6\u3420"+
-    "\6\u3474\6\u34c8\6\u351c\6\u3570\6\u35c4\6\u3618\6\u366c\6\u36c0"+
-    "\6\u3714\6\u3768\6\u37bc\6\u3810\6\u3864\6\u38b8\6\u390c\6\u3960"+
-    "\6\u39b4\6\u3a08\6\u3a5c\6\u3ab0\6\u3b04\6\u3b58\6\u3bac\6\u3c00"+
-    "\6\u3c54\6\u3ca8\6\u3cfc\6\u3d50\6\u3da4\6\u3df8\6\u3e4c\6\u3ea0"+
-    "\6\u3ef4\6\u3f48\6\u3f9c\6\u3ff0\6\u4044\6\u4098\6\u40ec\6\u4140"+
-    "\6\u4194\6\u41e8\6\u423c\6\u4290\6\u42e4\6\u4338\6\u438c\6\u43e0"+
-    "\6\u4434\6\u4488\6\u44dc\6\u4530\6\u4584\6\u45d8\6\u462c\6\u4680"+
-    "\6\u46d4\6\u4728\6\u477c\6\u47d0\6\u4824\6\u4878\6\u48cc\6\u4920"+
-    "\

<TRUNCATED>

[17/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
new file mode 100644
index 0000000..446253d
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
@@ -0,0 +1,150 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+/) {
+    print STDERR "Usage: $script_name -v <version>\n";
+    print STDERR "\tversion must be of the form X.Y, e.g. 11.0\n"
+        if ($version);
+    exit 1;
+}
+my $url = "http://www.unicode.org/Public/emoji/${version}/emoji-test.txt";
+my $underscore_version = $version;
+$underscore_version =~ s/\./_/g;
+my $class_name = "EmojiTokenizationTestUnicode_${underscore_version}";
+my $output_filename = "${class_name}.java";
+my $header =<<"__HEADER__";
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by ${script_name}
+ * from: ${url}
+ *
+ * emoji-test.txt contains emoji char sequences, which are represented as
+ * tokenization tests in this class.
+ * 
+ */
+\@Ignore
+public class ${class_name} extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    for (int i = 0 ; i < tests.length ; i += 2) {
+      String test = tests[i + 1];
+      try {
+        assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
+      } catch (Throwable t) {
+        throw new Exception("Failed to tokenize \\"" + tests[i] + "\\":", t);        
+      }
+    }
+  }
+
+  private String[] tests = new String[] {
+__HEADER__
+
+my @tests = split /\r?\n/, get_URL_content($url);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+open OUT, ">$output_path"
+    || die "Error opening '$output_path' for writing: $!";
+
+print STDERR "Writing '$output_path'...";
+
+print OUT $header;
+
+my $isFirst = 1;
+for my $line (@tests) {
+    next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
+
+    print OUT ",\n\n" unless $isFirst;
+    $isFirst = 0;
+
+    # Example line: 1F46E 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚๏ธ man police officer: light skin tone
+    $line =~ s/\s+$//;     # Trim trailing whitespace
+    $line =~ s/\t/  /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
+    print OUT "    \"$line\",\n";
+    my ($test_string) = $line =~ /^(.*?)\s*;/;
+    $test_string =~ s/([0-9A-F]+)/\\u$1/g;
+    $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
+    $test_string =~ s/\s//g;
+    print OUT "    \"${test_string}\"";
+}
+print OUT "  };\n}\n";
+close OUT;
+print STDERR "done.\n";
+
+
+# sub above_BMP_char_to_surrogates
+#
+# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
+# to the corresponding UTF-16 surrogate pair
+#
+# Assumption: input string is a sequence more than four hex digits
+#
+sub above_BMP_char_to_surrogates {
+    my $ch = hex(shift);
+    my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
+    my $low_surrogate  = 0xDC00 + ($ch & 0x3FF);
+    return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
+}
+
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+sub get_URL_content {
+    my $url = shift;
+    print STDERR "Retrieving '$url'...";
+    my $user_agent = LWP::UserAgent->new;
+    my $request = HTTP::Request->new(GET => $url);
+    my $response = $user_agent->request($request);
+    unless ($response->is_success) {
+        print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+        exit 1;
+    }
+    print STDERR "done.\n";
+    return $response->content;
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
index 3004035..dd16cb6 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
@@ -40,8 +40,6 @@ $underscore_version =~ s/\./_/g;
 my $class_name = "WordBreakTestUnicode_${underscore_version}";
 my $output_filename = "${class_name}.java";
 my $header =<<"__HEADER__";
-package org.apache.lucene.analysis;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -59,6 +57,8 @@ package org.apache.lucene.analysis;
  * limitations under the License.
  */
 
+package org.apache.lucene.analysis.standard;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.junit.Ignore;
@@ -81,7 +81,7 @@ import org.junit.Ignore;
  *    \\p{WordBreak = Hebrew_Letter}
  *    \\p{WordBreak = Katakana}
  *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\\uFF10-\\uFF19]                (Full-width Arabic digits)
+ *    [\\uFF10-\\uFF19]                 (Full-width Arabic digits)
  */
 \@Ignore
 public class ${class_name} extends BaseTokenStreamTestCase {
@@ -91,6 +91,7 @@ __HEADER__
 
 my $codepoints = [];
 map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
+my $regional_indicator_codepoints = [];
 # Complex_Context is an alias for 'SA', which is used in LineBreak.txt
 # Using lowercase versions of property value names to allow for case-
 # insensitive comparison with the names in the Unicode data files.
@@ -98,7 +99,9 @@ parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
 parse_Unicode_data_file($scripts_url, $codepoints, 
                         {'han' => 1, 'hiragana' => 1});
 parse_Unicode_data_file($word_break_url, $codepoints,
-                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
+                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1, 'e_base' => 1,
+                         'e_modifier' => 1, 'glue_after_zwj' => 1, 'e_base_gaz' => 1});
+parse_Unicode_data_file($word_break_url, $regional_indicator_codepoints, {'regional_indicator' => 1});
 my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
 
 my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
@@ -124,10 +127,21 @@ for my $line (@tests) {
   $test_string =~ s/\\u000D/\\r/g;
   $test_string =~ s/\\u0022/\\\"/g;
   $sequence =~ s/^\s*รท\s*//; # Trim leading break character
+  
+  # TODO: When upgrading JFlex to a version that supports Unicode 11.0+: remove the special case below for a Unicode 9.0 test data line that conflicts with TR#51 11.0 test data
+  # รท 200D รท 261D รท  #  รท [0.2] ZERO WIDTH JOINER (ZWJ_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+  if ($sequence =~ /^200D\s*รท\s*261D$/) {
+    print OUT "    // Skipping this test because it conflicts with TR#51 v11.0 rules.\n\n";
+    next;
+  }
+  
   my @tokens = ();
+  my $isfirst = 0;
   for my $candidate (split /\s*รท\s*/, $sequence) {
+    $isfirst = 1;
     my @chars = ();
-    my $has_wanted_char = 0;
+    my $has_wanted_chars = 0;
+    my $prev_char_regional_indicator = 0;
     while ($candidate =~ /([0-9A-F]+)/gi) {
       my $hexchar = $1;
       if (4 == length($hexchar)) {
@@ -135,12 +149,21 @@ for my $line (@tests) {
       } else {
         push @chars, above_BMP_char_to_surrogates($hexchar);
       }
-      unless ($has_wanted_char) {
-        $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
+      unless ($has_wanted_chars) {
+        my $codepoint = hex($hexchar);
+        if (defined($codepoints->[$codepoint])) {
+          $has_wanted_chars = 1;
+        } elsif (defined($regional_indicator_codepoints->[$codepoint])) {
+          if (1 == $prev_char_regional_indicator) {
+            $has_wanted_chars = 1; # must be 2 regional indicators in a row
+          } else {
+            $prev_char_regional_indicator = 1;
+          }
+        }
       }
     }
-    if ($has_wanted_char) {
-      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
+    if ($has_wanted_chars) {
+      push @tokens, '"'.join('', map { $_ eq "0022" ? "\\\"" : "\\u$_" } @chars).'"';
     }
   }
   print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";


[13/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
index 8b288c2..a2ad394 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
@@ -65,147 +66,212 @@ public final class StandardTokenizerImpl {
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\42\0\1\15\4\0\1\14\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\57\0\1\1"+
-    "\2\0\1\3\7\0\1\1\1\0\1\6\2\0\1\1\5\0\27\1"+
-    "\1\0\37\1\1\0\u01ca\1\4\0\14\1\5\0\1\6\10\0\5\1"+
-    "\7\0\1\1\1\0\1\1\21\0\160\3\5\1\1\0\2\1\2\0"+
-    "\4\1\1\7\7\0\1\1\1\6\3\1\1\0\1\1\1\0\24\1"+
-    "\1\0\123\1\1\0\213\1\1\0\7\3\236\1\11\0\46\1\2\0"+
-    "\1\1\7\0\47\1\1\0\1\7\7\0\55\3\1\0\1\3\1\0"+
-    "\2\3\1\0\2\3\1\0\1\3\10\0\33\16\5\0\3\16\1\1"+
-    "\1\6\13\0\5\3\7\0\2\7\2\0\13\3\1\0\1\3\3\0"+
-    "\53\1\25\3\12\4\1\0\1\4\1\7\1\0\2\1\1\3\143\1"+
-    "\1\0\1\1\10\3\1\0\6\3\2\1\2\3\1\0\4\3\2\1"+
-    "\12\4\3\1\2\0\1\1\17\0\1\3\1\1\1\3\36\1\33\3"+
-    "\2\0\131\1\13\3\1\1\16\0\12\4\41\1\11\3\2\1\2\0"+
-    "\1\7\1\0\1\1\5\0\26\1\4\3\1\1\11\3\1\1\3\3"+
-    "\1\1\5\3\22\0\31\1\3\3\104\0\1\1\1\0\13\1\67\0"+
-    "\33\3\1\0\4\3\66\1\3\3\1\1\22\3\1\1\7\3\12\1"+
-    "\2\3\2\0\12\4\1\0\7\1\1\0\7\1\1\0\3\3\1\0"+
-    "\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0"+
-    "\4\1\2\0\1\3\1\1\7\3\2\0\2\3\2\0\3\3\1\1"+
-    "\10\0\1\3\4\0\2\1\1\0\3\1\2\3\2\0\12\4\2\1"+
-    "\17\0\3\3\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1"+
-    "\1\0\2\1\1\0\2\1\1\0\2\1\2\0\1\3\1\0\5\3"+
-    "\4\0\2\3\2\0\3\3\3\0\1\3\7\0\4\1\1\0\1\1"+
-    "\7\0\12\4\2\3\3\1\1\3\13\0\3\3\1\0\11\1\1\0"+
-    "\3\1\1\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0"+
-    "\1\3\1\1\10\3\1\0\3\3\1\0\3\3\2\0\1\1\17\0"+
-    "\2\1\2\3\2\0\12\4\21\0\3\3\1\0\10\1\2\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\3"+
-    "\1\1\7\3\2\0\2\3\2\0\3\3\10\0\2\3\4\0\2\1"+
-    "\1\0\3\1\2\3\2\0\12\4\1\0\1\1\20\0\1\3\1\1"+
-    "\1\0\6\1\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1"+
-    "\1\0\2\1\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\3"+
-    "\3\0\3\3\1\0\4\3\2\0\1\1\6\0\1\3\16\0\12\4"+
-    "\21\0\3\3\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1"+
-    "\1\0\5\1\3\0\1\1\7\3\1\0\3\3\1\0\4\3\7\0"+
-    "\2\3\1\0\2\1\6\0\2\1\2\3\2\0\12\4\22\0\2\3"+
-    "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+
-    "\2\0\1\3\1\1\7\3\1\0\3\3\1\0\4\3\7\0\2\3"+
-    "\7\0\1\1\1\0\2\1\2\3\2\0\12\4\1\0\2\1\17\0"+
-    "\2\3\1\0\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\3"+
-    "\1\0\3\3\1\0\4\3\1\1\10\0\1\3\10\0\2\1\2\3"+
-    "\2\0\12\4\12\0\6\1\2\0\2\3\1\0\22\1\3\0\30\1"+
-    "\1\0\11\1\1\0\1\1\2\0\7\1\3\0\1\3\4\0\6\3"+
-    "\1\0\1\3\1\0\10\3\22\0\2\3\15\0\60\20\1\21\2\20"+
-    "\7\21\5\0\7\20\10\21\1\0\12\4\47\0\2\20\1\0\1\20"+
-    "\2\0\2\20\1\0\1\20\2\0\1\20\6\0\4\20\1\0\7\20"+
-    "\1\0\3\20\1\0\1\20\1\0\1\20\2\0\2\20\1\0\4\20"+
-    "\1\21\2\20\6\21\1\0\2\21\1\20\2\0\5\20\1\0\1\20"+
-    "\1\0\6\21\2\0\12\4\2\0\4\20\40\0\1\1\27\0\2\3"+
-    "\6\0\12\4\13\0\1\3\1\0\1\3\1\0\1\3\4\0\2\3"+
-    "\10\1\1\0\44\1\4\0\24\3\1\0\2\3\5\1\13\3\1\0"+
-    "\44\3\11\0\1\3\71\0\53\20\24\21\1\20\12\4\6\0\6\20"+
-    "\4\21\4\20\3\21\1\20\3\21\2\20\7\21\3\20\4\21\15\20"+
-    "\14\21\1\20\1\21\12\4\4\21\2\20\46\1\1\0\1\1\5\0"+
-    "\1\1\2\0\53\1\1\0\4\1\u0100\2\111\1\1\0\4\1\2\0"+
-    "\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0\4\1\2\0"+
-    "\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+
-    "\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0\3\3\40\0"+
-    "\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0\32\1\5\0"+
-    "\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\3\13\0\22\1"+
-    "\3\3\13\0\22\1\2\3\14\0\15\1\1\0\3\1\1\0\2\3"+
-    "\14\0\64\20\40\21\3\0\1\20\4\0\1\20\1\21\2\0\12\4"+
-    "\41\0\4\3\1\0\12\4\6\0\130\1\10\0\51\1\1\3\1\1"+
-    "\5\0\106\1\12\0\35\1\3\0\14\3\4\0\14\3\12\0\12\4"+
-    "\36\20\2\0\5\20\13\0\54\20\4\0\21\21\7\20\2\21\6\0"+
-    "\12\4\1\20\3\0\2\20\40\0\27\1\5\3\4\0\65\20\12\21"+
-    "\1\0\35\21\2\0\1\3\12\4\6\0\12\4\6\0\16\20\122\0"+
-    "\5\3\57\1\21\3\7\1\4\0\12\4\21\0\11\3\14\0\3\3"+
-    "\36\1\15\3\2\1\12\4\54\1\16\3\14\0\44\1\24\3\10\0"+
-    "\12\4\3\0\3\1\12\4\44\1\122\0\3\3\1\0\25\3\4\1"+
-    "\1\3\4\1\3\3\2\1\11\0\300\1\47\3\25\0\4\3\u0116\1"+
-    "\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1"+
-    "\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1"+
-    "\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1"+
-    "\4\0\15\1\5\0\3\1\1\0\7\1\17\0\4\3\10\0\2\10"+
-    "\12\0\1\10\2\0\1\6\2\0\5\3\20\0\2\11\3\0\1\7"+
-    "\17\0\1\11\13\0\5\3\1\0\12\3\1\0\1\1\15\0\1\1"+
-    "\20\0\15\1\63\0\41\3\21\0\1\1\4\0\1\1\2\0\12\1"+
-    "\1\0\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1"+
-    "\1\0\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1"+
-    "\21\0\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1"+
-    "\6\0\4\1\3\3\2\1\14\0\46\1\1\0\1\1\5\0\1\1"+
-    "\2\0\70\1\7\0\1\1\17\0\1\3\27\1\11\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\40\3\57\0\1\1\120\0\32\12\1\0"+
-    "\131\12\14\0\326\12\57\0\1\1\1\0\1\12\31\0\11\12\6\3"+
-    "\1\0\5\5\2\0\3\12\1\1\1\1\4\0\126\13\2\0\2\3"+
-    "\2\5\3\13\133\5\1\0\4\5\5\0\51\1\3\0\136\2\21\0"+
-    "\33\1\65\0\20\5\320\0\57\5\1\0\130\5\250\0\u19b6\12\112\0"+
-    "\u51cd\12\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\4"+
-    "\2\1\24\0\57\1\4\3\1\0\12\3\1\0\31\1\7\0\1\3"+
-    "\120\1\2\3\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
-    "\14\0\13\1\115\0\12\1\1\3\3\1\1\3\4\1\1\3\27\1"+
-    "\5\3\30\0\64\1\14\0\2\3\62\1\21\3\13\0\12\4\6\0"+
-    "\22\3\6\1\3\0\1\1\4\0\12\4\34\1\10\3\2\0\27\1"+
-    "\15\3\14\0\35\2\3\0\4\3\57\1\16\3\16\0\1\1\12\4"+
-    "\46\0\51\1\16\3\11\0\3\1\1\3\10\1\2\3\2\0\12\4"+
-    "\6\0\33\20\1\21\4\0\60\20\1\21\1\20\3\21\2\20\2\21"+
-    "\5\20\2\21\1\20\1\21\1\20\30\0\5\20\13\1\5\3\2\0"+
-    "\3\1\2\3\12\0\6\1\2\0\6\1\2\0\6\1\11\0\7\1"+
-    "\1\0\7\1\221\0\43\1\10\3\1\0\2\3\2\0\12\4\6\0"+
-    "\u2ba4\2\14\0\27\2\4\0\61\2\u2104\0\u016e\12\2\0\152\12\46\0"+
-    "\7\1\14\0\5\1\5\0\1\16\1\3\12\16\1\0\15\16\1\0"+
-    "\5\16\1\0\1\16\1\0\2\16\1\0\2\16\1\0\12\16\142\1"+
-    "\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\3"+
-    "\1\7\2\0\1\6\1\7\13\0\7\3\14\0\2\11\30\0\3\11"+
-    "\1\7\1\0\1\10\1\0\1\7\1\6\32\0\5\1\1\0\207\1"+
-    "\2\0\1\3\7\0\1\10\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\13\0\70\5"+
-    "\2\3\37\2\3\0\6\2\2\0\6\2\2\0\6\2\2\0\3\2"+
-    "\34\0\3\3\4\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1"+
-    "\1\0\17\1\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\3"+
-    "\202\0\35\1\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1"+
-    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\4"+
-    "\u0356\0\6\1\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1"+
-    "\2\0\27\1\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1"+
-    "\100\0\1\1\3\3\1\0\2\3\5\0\4\3\4\1\1\0\3\1"+
-    "\1\0\33\1\4\0\3\3\4\0\1\3\40\0\35\1\203\0\66\1"+
-    "\12\0\26\1\12\0\23\1\215\0\111\1\u03b7\0\3\3\65\1\17\3"+
-    "\37\0\12\4\20\0\3\3\55\1\13\3\2\0\1\3\22\0\31\1"+
-    "\7\0\12\4\6\0\3\3\44\1\16\3\1\0\12\4\100\0\3\3"+
-    "\60\1\16\3\4\1\13\0\12\4\u04a6\0\53\1\15\3\10\0\12\4"+
-    "\u0936\0\u036f\1\221\0\143\1\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1"+
-    "\13\0\1\1\56\3\20\0\4\3\15\1\u4060\0\1\5\1\13\u2163\0"+
-    "\5\3\3\0\26\3\2\0\7\3\36\0\4\3\224\0\3\3\u01bb\0"+
-    "\125\1\1\0\107\1\1\0\2\1\2\0\1\1\2\0\2\1\2\0"+
-    "\4\1\1\0\14\1\1\0\1\1\1\0\7\1\1\0\101\1\1\0"+
-    "\4\1\2\0\10\1\1\0\7\1\1\0\34\1\1\0\4\1\1\0"+
-    "\5\1\1\0\1\1\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0"+
-    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
-    "\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0\10\1\2\0"+
-    "\62\4\u1600\0\4\1\1\0\33\1\1\0\2\1\1\0\1\1\2\0"+
-    "\1\1\1\0\12\1\1\0\4\1\1\0\1\1\1\0\1\1\6\0"+
-    "\1\1\4\0\1\1\1\0\1\1\1\0\1\1\1\0\3\1\1\0"+
-    "\2\1\1\0\1\1\2\0\1\1\1\0\1\1\1\0\1\1\1\0"+
-    "\1\1\1\0\1\1\1\0\2\1\1\0\1\1\2\0\4\1\1\0"+
-    "\7\1\1\0\4\1\1\0\4\1\1\0\1\1\1\0\12\1\1\0"+
-    "\21\1\5\0\3\1\1\0\5\1\1\0\21\1\u032a\0\32\17\1\13"+
-    "\u0dff\0\ua6d7\12\51\0\u1035\12\13\0\336\12\u3fe2\0\u021e\12\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
-    "\1\3\36\0\140\3\200\0\360\3\uffff\0\uffff\0\ufe12\0";
+    "\42\0\1\32\1\7\3\0\1\31\2\0\1\7\1\0\1\24\1\0"+
+    "\1\25\1\0\12\21\1\23\1\24\5\0\32\15\4\0\1\26\1\0"+
+    "\32\15\56\0\1\4\1\15\2\0\1\5\1\4\6\0\1\15\1\0"+
+    "\1\23\2\0\1\15\5\0\27\15\1\0\37\15\1\0\u01ca\15\4\0"+
+    "\14\15\5\0\1\23\10\0\5\15\7\0\1\15\1\0\1\15\21\0"+
+    "\160\5\5\15\1\0\2\15\2\0\4\15\1\24\1\15\6\0\1\15"+
+    "\1\23\3\15\1\0\1\15\1\0\24\15\1\0\123\15\1\0\213\15"+
+    "\1\0\7\5\246\15\1\0\46\15\2\0\1\15\7\0\47\15\1\0"+
+    "\1\24\7\0\55\5\1\0\1\5\1\0\2\5\1\0\2\5\1\0"+
+    "\1\5\10\0\33\33\5\0\3\33\1\15\1\23\13\0\6\5\6\0"+
+    "\2\24\2\0\13\5\1\0\1\5\3\0\53\15\25\5\12\20\1\0"+
+    "\1\20\1\24\1\0\2\15\1\5\143\15\1\0\1\15\10\5\1\0"+
+    "\6\5\2\15\2\5\1\0\4\5\2\15\12\20\3\15\2\0\1\15"+
+    "\17\0\1\5\1\15\1\5\36\15\33\5\2\0\131\15\13\5\1\15"+
+    "\16\0\12\20\41\15\11\5\2\15\2\0\1\24\1\0\1\15\5\0"+
+    "\26\15\4\5\1\15\11\5\1\15\3\5\1\15\5\5\22\0\31\15"+
+    "\3\5\104\0\25\15\1\0\10\15\26\0\60\5\66\15\3\5\1\15"+
+    "\22\5\1\15\7\5\12\15\2\5\2\0\12\20\1\0\20\15\3\5"+
+    "\1\0\10\15\2\0\2\15\2\0\26\15\1\0\7\15\1\0\1\15"+
+    "\3\0\4\15\2\0\1\5\1\15\7\5\2\0\2\5\2\0\3\5"+
+    "\1\15\10\0\1\5\4\0\2\15\1\0\3\15\2\5\2\0\12\20"+
+    "\2\15\17\0\3\5\1\0\6\15\4\0\2\15\2\0\26\15\1\0"+
+    "\7\15\1\0\2\15\1\0\2\15\1\0\2\15\2\0\1\5\1\0"+
+    "\5\5\4\0\2\5\2\0\3\5\3\0\1\5\7\0\4\15\1\0"+
+    "\1\15\7\0\12\20\2\5\3\15\1\5\13\0\3\5\1\0\11\15"+
+    "\1\0\3\15\1\0\26\15\1\0\7\15\1\0\2\15\1\0\5\15"+
+    "\2\0\1\5\1\15\10\5\1\0\3\5\1\0\3\5\2\0\1\15"+
+    "\17\0\2\15\2\5\2\0\12\20\11\0\1\15\7\0\3\5\1\0"+
+    "\10\15\2\0\2\15\2\0\26\15\1\0\7\15\1\0\2\15\1\0"+
+    "\5\15\2\0\1\5\1\15\7\5\2\0\2\5\2\0\3\5\10\0"+
+    "\2\5\4\0\2\15\1\0\3\15\2\5\2\0\12\20\1\0\1\15"+
+    "\20\0\1\5\1\15\1\0\6\15\3\0\3\15\1\0\4\15\3\0"+
+    "\2\15\1\0\1\15\1\0\2\15\3\0\2\15\3\0\3\15\3\0"+
+    "\14\15\4\0\5\5\3\0\3\5\1\0\4\5\2\0\1\15\6\0"+
+    "\1\5\16\0\12\20\20\0\4\5\1\0\10\15\1\0\3\15\1\0"+
+    "\27\15\1\0\20\15\3\0\1\15\7\5\1\0\3\5\1\0\4\5"+
+    "\7\0\2\5\1\0\3\15\5\0\2\15\2\5\2\0\12\20\20\0"+
+    "\1\15\3\5\1\0\10\15\1\0\3\15\1\0\27\15\1\0\12\15"+
+    "\1\0\5\15\2\0\1\5\1\15\7\5\1\0\3\5\1\0\4\5"+
+    "\7\0\2\5\7\0\1\15\1\0\2\15\2\5\2\0\12\20\1\0"+
+    "\2\15\16\0\3\5\1\0\10\15\1\0\3\15\1\0\51\15\2\0"+
+    "\1\15\7\5\1\0\3\5\1\0\4\5\1\15\5\0\3\15\1\5"+
+    "\7\0\3\15\2\5\2\0\12\20\12\0\6\15\2\0\2\5\1\0"+
+    "\22\15\3\0\30\15\1\0\11\15\1\0\1\15\2\0\7\15\3\0"+
+    "\1\5\4\0\6\5\1\0\1\5\1\0\10\5\6\0\12\20\2\0"+
+    "\2\5\15\0\60\34\1\35\2\34\7\35\5\0\7\34\10\35\1\0"+
+    "\12\20\47\0\2\34\1\0\1\34\2\0\2\34\1\0\1\34\2\0"+
+    "\1\34\6\0\4\34\1\0\7\34\1\0\3\34\1\0\1\34\1\0"+
+    "\1\34\2\0\2\34\1\0\4\34\1\35\2\34\6\35\1\0\2\35"+
+    "\1\34\2\0\5\34\1\0\1\34\1\0\6\35\2\0\12\20\2\0"+
+    "\4\34\40\0\1\15\27\0\2\5\6\0\12\20\13\0\1\5\1\0"+
+    "\1\5\1\0\1\5\4\0\2\5\10\15\1\0\44\15\4\0\24\5"+
+    "\1\0\2\5\5\15\13\5\1\0\44\5\11\0\1\5\71\0\53\34"+
+    "\24\35\1\34\12\20\6\0\6\34\4\35\4\34\3\35\1\34\3\35"+
+    "\2\34\7\35\3\34\4\35\15\34\14\35\1\34\1\35\12\20\4\35"+
+    "\2\34\46\15\1\0\1\15\5\0\1\15\2\0\53\15\1\0\4\15"+
+    "\u0100\17\111\15\1\0\4\15\2\0\7\15\1\0\1\15\1\0\4\15"+
+    "\2\0\51\15\1\0\4\15\2\0\41\15\1\0\4\15\2\0\7\15"+
+    "\1\0\1\15\1\0\4\15\2\0\17\15\1\0\71\15\1\0\4\15"+
+    "\2\0\103\15\2\0\3\5\40\0\20\15\20\0\126\15\2\0\6\15"+
+    "\3\0\u026c\15\2\0\21\15\1\0\32\15\5\0\113\15\3\0\13\15"+
+    "\7\0\15\15\1\0\4\15\3\5\13\0\22\15\3\5\13\0\22\15"+
+    "\2\5\14\0\15\15\1\0\3\15\1\0\2\5\14\0\64\34\40\35"+
+    "\3\0\1\34\4\0\1\34\1\35\2\0\12\20\41\0\4\5\1\0"+
+    "\12\20\6\0\130\15\10\0\5\15\2\5\42\15\1\5\1\15\5\0"+
+    "\106\15\12\0\37\15\1\0\14\5\4\0\14\5\12\0\12\20\36\34"+
+    "\2\0\5\34\13\0\54\34\4\0\32\34\6\0\12\20\1\34\3\0"+
+    "\2\34\40\0\27\15\5\5\4\0\65\34\12\35\1\0\35\35\2\0"+
+    "\1\5\12\20\6\0\12\20\6\0\16\34\2\0\17\5\101\0\5\5"+
+    "\57\15\21\5\7\15\4\0\12\20\21\0\11\5\14\0\3\5\36\15"+
+    "\15\5\2\15\12\20\54\15\16\5\14\0\44\15\24\5\10\0\12\20"+
+    "\3\0\3\15\12\20\44\15\2\0\11\15\107\0\3\5\1\0\25\5"+
+    "\4\15\1\5\4\15\3\5\2\15\1\0\2\5\6\0\300\15\66\5"+
+    "\5\0\5\5\u0116\15\2\0\6\15\2\0\46\15\2\0\6\15\2\0"+
+    "\10\15\1\0\1\15\1\0\1\15\1\0\1\15\1\0\37\15\2\0"+
+    "\65\15\1\0\7\15\1\0\1\15\3\0\3\15\1\0\7\15\3\0"+
+    "\4\15\2\0\6\15\4\0\15\15\5\0\3\15\1\0\7\15\17\0"+
+    "\1\5\1\12\2\5\10\0\2\25\12\0\1\25\2\0\1\23\2\0"+
+    "\5\5\1\26\14\0\1\4\2\0\2\26\3\0\1\24\4\0\1\4"+
+    "\12\0\1\26\13\0\5\5\1\0\12\5\1\0\1\15\15\0\1\15"+
+    "\20\0\15\15\63\0\23\5\1\10\15\5\21\0\1\15\4\0\1\15"+
+    "\2\0\12\15\1\0\1\15\3\0\5\15\4\0\1\4\1\0\1\15"+
+    "\1\0\1\15\1\0\1\15\1\0\4\15\1\0\12\15\1\16\2\0"+
+    "\4\15\5\0\5\15\4\0\1\15\21\0\51\15\13\0\6\4\17\0"+
+    "\2\4\u016f\0\2\4\14\0\1\4\137\0\1\4\106\0\1\4\31\0"+
+    "\13\4\4\0\3\4\273\0\14\15\1\16\47\15\300\0\2\4\12\0"+
+    "\1\4\11\0\1\4\72\0\4\4\1\0\5\4\1\4\1\0\7\4"+
+    "\1\4\2\4\1\4\1\4\1\0\2\4\2\4\1\4\4\4\1\3"+
+    "\2\4\1\4\1\4\2\4\2\4\1\4\3\4\1\4\3\4\2\4"+
+    "\10\4\3\4\5\4\1\4\1\4\1\4\5\4\14\4\13\4\2\4"+
+    "\2\4\1\4\1\4\2\4\1\4\1\4\22\4\1\4\2\4\2\4"+
+    "\6\4\12\0\2\4\6\4\1\4\1\4\1\4\2\4\3\4\2\4"+
+    "\10\4\2\4\4\4\2\4\13\4\2\4\5\4\2\4\2\4\1\4"+
+    "\5\4\2\4\1\4\1\4\1\4\2\4\24\4\2\4\5\4\6\4"+
+    "\1\4\2\4\1\3\1\4\2\4\1\4\4\4\1\4\2\4\1\4"+
+    "\2\0\2\4\4\3\1\4\1\4\2\4\1\4\1\0\1\4\1\0"+
+    "\1\4\6\0\1\4\3\0\1\4\6\0\1\4\12\0\2\4\17\0"+
+    "\1\4\2\0\1\4\4\0\1\4\1\0\1\4\4\0\3\4\1\0"+
+    "\1\4\13\0\2\4\3\4\55\0\3\4\11\0\1\4\16\0\1\4"+
+    "\16\0\1\4\u0174\0\2\4\u01cf\0\3\4\23\0\2\4\63\0\1\4"+
+    "\4\0\1\4\252\0\57\15\1\0\57\15\1\0\205\15\6\0\4\15"+
+    "\3\5\2\15\14\0\46\15\1\0\1\15\5\0\1\15\2\0\70\15"+
+    "\7\0\1\15\17\0\1\5\27\15\11\0\7\15\1\0\7\15\1\0"+
+    "\7\15\1\0\7\15\1\0\7\15\1\0\7\15\1\0\7\15\1\0"+
+    "\7\15\1\0\40\5\57\0\1\15\120\0\32\27\1\0\131\27\14\0"+
+    "\326\27\57\0\1\15\1\0\1\27\31\0\11\27\6\5\1\4\5\22"+
+    "\2\0\3\27\1\15\1\15\1\4\3\0\126\30\2\0\2\5\2\22"+
+    "\3\30\133\22\1\0\4\22\5\0\51\15\3\0\136\17\21\0\33\15"+
+    "\65\0\20\22\227\0\1\4\1\0\1\4\66\0\57\22\1\0\130\22"+
+    "\250\0\u19b6\27\112\0\u51d6\27\52\0\u048d\15\103\0\56\15\2\0\u010d\15"+
+    "\3\0\20\15\12\20\2\15\24\0\57\15\4\5\1\0\12\5\1\0"+
+    "\37\15\2\5\120\15\2\5\45\0\11\15\2\0\147\15\2\0\44\15"+
+    "\1\0\10\15\77\0\13\15\1\5\3\15\1\5\4\15\1\5\27\15"+
+    "\5\5\30\0\64\15\14\0\2\5\62\15\22\5\12\0\12\20\6\0"+
+    "\22\5\6\15\3\0\1\15\1\0\1\15\2\0\12\20\34\15\10\5"+
+    "\2\0\27\15\15\5\14\0\35\17\3\0\4\5\57\15\16\5\16\0"+
+    "\1\15\12\20\6\0\5\34\1\35\12\34\12\20\5\34\1\0\51\15"+
+    "\16\5\11\0\3\15\1\5\10\15\2\5\2\0\12\20\6\0\33\34"+
+    "\3\35\62\34\1\35\1\34\3\35\2\34\2\35\5\34\2\35\1\34"+
+    "\1\35\1\34\30\0\5\34\13\15\5\5\2\0\3\15\2\5\12\0"+
+    "\6\15\2\0\6\15\2\0\6\15\11\0\7\15\1\0\7\15\1\0"+
+    "\53\15\1\0\12\15\12\0\163\15\10\5\1\0\2\5\2\0\12\20"+
+    "\6\0\u2ba4\17\14\0\27\17\4\0\61\17\u2104\0\u016e\27\2\0\152\27"+
+    "\46\0\7\15\14\0\5\15\5\0\1\33\1\5\12\33\1\0\15\33"+
+    "\1\0\5\33\1\0\1\33\1\0\2\33\1\0\2\33\1\0\12\33"+
+    "\142\15\41\0\u016b\15\22\0\100\15\2\0\66\15\50\0\14\15\4\0"+
+    "\16\5\1\6\1\11\1\24\2\0\1\23\1\24\13\0\20\5\3\0"+
+    "\2\26\30\0\3\26\1\24\1\0\1\25\1\0\1\24\1\23\32\0"+
+    "\5\15\1\0\207\15\2\0\1\5\7\0\1\25\4\0\1\24\1\0"+
+    "\1\25\1\0\12\20\1\23\1\24\5\0\32\15\4\0\1\26\1\0"+
+    "\32\15\13\0\70\22\2\5\37\17\3\0\6\17\2\0\6\17\2\0"+
+    "\6\17\2\0\3\17\34\0\3\5\4\0\14\15\1\0\32\15\1\0"+
+    "\23\15\1\0\2\15\1\0\17\15\2\0\16\15\42\0\173\15\105\0"+
+    "\65\15\210\0\1\5\202\0\35\15\3\0\61\15\17\0\1\5\37\0"+
+    "\40\15\20\0\33\15\5\0\46\15\5\5\5\0\36\15\2\0\44\15"+
+    "\4\0\10\15\1\0\5\15\52\0\236\15\2\0\12\20\6\0\44\15"+
+    "\4\0\44\15\4\0\50\15\10\0\64\15\234\0\u0137\15\11\0\26\15"+
+    "\12\0\10\15\230\0\6\15\2\0\1\15\1\0\54\15\1\0\2\15"+
+    "\3\0\1\15\2\0\27\15\12\0\27\15\11\0\37\15\101\0\23\15"+
+    "\1\0\2\15\12\0\26\15\12\0\32\15\106\0\70\15\6\0\2\15"+
+    "\100\0\1\15\3\5\1\0\2\5\5\0\4\5\4\15\1\0\3\15"+
+    "\1\0\33\15\4\0\3\5\4\0\1\5\40\0\35\15\3\0\35\15"+
+    "\43\0\10\15\1\0\34\15\2\5\31\0\66\15\12\0\26\15\12\0"+
+    "\23\15\15\0\22\15\156\0\111\15\67\0\63\15\15\0\63\15\u030d\0"+
+    "\3\5\65\15\17\5\37\0\12\20\17\0\4\5\55\15\13\5\2\0"+
+    "\1\5\22\0\31\15\7\0\12\20\6\0\3\5\44\15\16\5\1\0"+
+    "\12\20\20\0\43\15\1\5\2\0\1\15\11\0\3\5\60\15\16\5"+
+    "\4\15\5\0\3\5\3\0\12\20\1\15\1\0\1\15\43\0\22\15"+
+    "\1\0\31\15\14\5\6\0\1\5\101\0\7\15\1\0\1\15\1\0"+
+    "\4\15\1\0\17\15\1\0\12\15\7\0\57\15\14\5\5\0\12\20"+
+    "\6\0\4\5\1\0\10\15\2\0\2\15\2\0\26\15\1\0\7\15"+
+    "\1\0\2\15\1\0\5\15\2\0\1\5\1\15\7\5\2\0\2\5"+
+    "\2\0\3\5\2\0\1\15\6\0\1\5\5\0\5\15\2\5\2\0"+
+    "\7\5\3\0\5\5\213\0\65\15\22\5\4\15\5\0\12\20\46\0"+
+    "\60\15\24\5\2\15\1\0\1\15\10\0\12\20\246\0\57\15\7\5"+
+    "\2\0\11\5\27\0\4\15\2\5\42\0\60\15\21\5\3\0\1\15"+
+    "\13\0\12\20\46\0\53\15\15\5\10\0\12\20\66\0\32\34\3\0"+
+    "\17\35\4\0\12\20\2\34\3\0\1\34\u0160\0\100\15\12\20\25\0"+
+    "\1\15\u01c0\0\71\15\u0107\0\11\15\1\0\45\15\10\5\1\0\10\5"+
+    "\1\15\17\0\12\20\30\0\36\15\2\0\26\5\1\0\16\5\u0349\0"+
+    "\u039a\15\146\0\157\15\21\0\304\15\u0abc\0\u042f\15\u0fd1\0\u0247\15\u21b9\0"+
+    "\u0239\15\7\0\37\15\1\0\12\20\146\0\36\15\2\0\5\5\13\0"+
+    "\60\15\7\5\11\0\4\15\14\0\12\20\11\0\25\15\5\0\23\15"+
+    "\u0370\0\105\15\13\0\1\15\56\5\20\0\4\5\15\15\100\0\1\15"+
+    "\u401f\0\1\22\1\30\u0bfe\0\153\15\5\0\15\15\3\0\11\15\7\0"+
+    "\12\15\3\0\2\5\1\0\4\5\u14c1\0\5\5\3\0\26\5\2\0"+
+    "\7\5\36\0\4\5\224\0\3\5\u01bb\0\125\15\1\0\107\15\1\0"+
+    "\2\15\2\0\1\15\2\0\2\15\2\0\4\15\1\0\14\15\1\0"+
+    "\1\15\1\0\7\15\1\0\101\15\1\0\4\15\2\0\10\15\1\0"+
+    "\7\15\1\0\34\15\1\0\4\15\1\0\5\15\1\0\1\15\3\0"+
+    "\7\15\1\0\u0154\15\2\0\31\15\1\0\31\15\1\0\37\15\1\0"+
+    "\31\15\1\0\37\15\1\0\31\15\1\0\37\15\1\0\31\15\1\0"+
+    "\37\15\1\0\31\15\1\0\10\15\2\0\62\20\u0200\0\67\5\4\0"+
+    "\62\5\10\0\1\5\16\0\1\5\26\0\5\5\1\0\17\5\u0550\0"+
+    "\7\5\1\0\21\5\2\0\7\5\1\0\2\5\1\0\5\5\u07d5\0"+
+    "\305\15\13\0\7\5\51\0\104\15\7\5\5\0\12\20\u04a6\0\4\15"+
+    "\1\0\33\15\1\0\2\15\1\0\1\15\2\0\1\15\1\0\12\15"+
+    "\1\0\4\15\1\0\1\15\1\0\1\15\6\0\1\15\4\0\1\15"+
+    "\1\0\1\15\1\0\1\15\1\0\3\15\1\0\2\15\1\0\1\15"+
+    "\2\0\1\15\1\0\1\15\1\0\1\15\1\0\1\15\1\0\1\15"+
+    "\1\0\2\15\1\0\1\15\2\0\4\15\1\0\7\15\1\0\4\15"+
+    "\1\0\4\15\1\0\1\15\1\0\12\15\1\0\21\15\5\0\3\15"+
+    "\1\0\5\15\1\0\21\15\u0144\0\4\4\1\4\312\4\1\4\60\4"+
+    "\15\0\3\4\37\0\1\4\32\15\6\0\32\15\2\0\4\4\2\16"+
+    "\14\15\2\16\12\15\4\0\1\4\2\0\12\4\22\0\71\4\32\1"+
+    "\1\30\2\4\15\4\12\0\1\4\24\0\1\4\2\0\11\4\1\0"+
+    "\4\4\11\0\7\4\2\4\256\4\42\4\2\4\141\4\1\3\16\4"+
+    "\2\4\2\4\1\4\3\4\2\4\44\4\3\3\2\4\1\3\2\4"+
+    "\3\3\44\4\2\4\3\4\1\4\4\4\5\2\102\4\2\3\2\4"+
+    "\13\3\25\4\4\3\4\4\1\3\1\4\11\3\3\4\1\3\4\4"+
+    "\3\3\1\4\3\3\42\4\1\3\123\4\1\4\77\4\10\0\3\4"+
+    "\6\4\1\4\30\4\7\4\2\4\2\4\1\4\2\3\4\4\1\3"+
+    "\14\4\1\4\2\4\4\4\2\4\1\3\4\4\2\3\15\4\2\4"+
+    "\2\4\1\4\10\4\2\4\11\4\1\4\5\4\3\4\14\4\3\4"+
+    "\10\4\3\4\2\4\1\4\1\4\1\4\4\4\1\4\6\4\1\4"+
+    "\3\4\1\4\6\4\113\4\3\3\3\4\5\3\60\0\43\4\1\3"+
+    "\20\4\3\3\11\4\1\3\5\4\5\4\1\4\1\3\6\4\15\4"+
+    "\6\4\3\4\1\4\1\4\2\4\3\4\1\4\2\4\7\4\6\4"+
+    "\164\0\14\4\125\0\53\4\14\0\4\4\70\0\10\4\12\0\6\4"+
+    "\50\0\10\4\36\0\122\4\14\0\4\4\10\4\5\3\1\4\2\3"+
+    "\6\4\1\3\11\4\12\3\1\4\1\0\1\4\2\3\1\4\6\4"+
+    "\1\0\52\4\2\4\4\4\3\4\1\4\1\4\47\4\15\4\5\4"+
+    "\2\3\1\4\2\3\6\4\3\4\15\4\1\4\15\3\42\4\u05fe\4"+
+    "\2\0\ua6d7\27\51\0\u1035\27\13\0\336\27\2\0\u1682\27\u295e\0\u021e\27"+
+    "\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
+    "\1\5\36\0\137\13\1\14\200\0\360\5\uffff\0\uffff\0\ufe12\0";
 
   /** 
    * Translates characters to character classes
@@ -218,12 +284,15 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
-    "\1\7\1\2\1\1\1\10\1\2\1\0\1\2\1\0"+
-    "\1\4\1\0\2\2\2\0\1\1\1\0";
+    "\1\0\2\1\3\2\2\1\1\3\1\2\1\4\2\5"+
+    "\1\6\1\1\1\7\1\10\1\3\1\11\1\2\1\0"+
+    "\4\2\1\0\1\2\2\0\1\3\1\0\1\3\2\2"+
+    "\1\0\1\5\1\2\1\5\1\0\2\3\1\0\2\2"+
+    "\2\0\1\2\1\0\2\3\5\2\1\0\1\2\1\3"+
+    "\3\2";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -248,12 +317,17 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\22\0\44\0\66\0\110\0\132\0\154\0\176"+
-    "\0\220\0\242\0\264\0\306\0\330\0\352\0\374\0\u010e"+
-    "\0\u0120\0\154\0\u0132\0\u0144\0\u0156\0\264\0\u0168\0\u017a";
+    "\0\0\0\36\0\74\0\132\0\170\0\226\0\264\0\322"+
+    "\0\360\0\u010e\0\u012c\0\u014a\0\u0168\0\u0186\0\u01a4\0\u01c2"+
+    "\0\u01e0\0\u01fe\0\u021c\0\u023a\0\74\0\u0258\0\u0276\0\u0294"+
+    "\0\u02b2\0\264\0\u02d0\0\u02ee\0\322\0\u030c\0\u032a\0\u0348"+
+    "\0\u0366\0\u0384\0\u03a2\0\u03c0\0\u03de\0\u03fc\0\u01a4\0\u041a"+
+    "\0\u0438\0\u0456\0\u0474\0\u0492\0\u04b0\0\u04ce\0\u04ec\0\u050a"+
+    "\0\u0528\0\u0546\0\u0564\0\u0582\0\u05a0\0\u05be\0\u05dc\0\u05fa"+
+    "\0\36\0\u0618\0\360\0\u0636\0\u0654";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -276,33 +350,94 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_TRANS = zzUnpackTrans();
 
   private static final String ZZ_TRANS_PACKED_0 =
-    "\1\2\1\3\1\4\1\2\1\5\1\6\3\2\1\7"+
-    "\1\10\1\11\2\2\1\12\1\13\2\14\23\0\3\3"+
-    "\1\15\1\0\1\16\1\0\1\16\1\17\2\0\1\16"+
-    "\1\0\1\12\2\0\1\3\1\0\1\3\2\4\1\15"+
-    "\1\0\1\16\1\0\1\16\1\17\2\0\1\16\1\0"+
-    "\1\12\2\0\1\4\1\0\2\3\2\5\2\0\2\20"+
-    "\1\21\2\0\1\20\1\0\1\12\2\0\1\5\3\0"+
-    "\1\6\1\0\1\6\3\0\1\17\7\0\1\6\1\0"+
-    "\2\3\1\22\1\5\1\23\3\0\1\22\4\0\1\12"+
-    "\2\0\1\22\3\0\1\10\15\0\1\10\3\0\1\11"+
-    "\15\0\1\11\1\0\2\3\1\12\1\15\1\0\1\16"+
-    "\1\0\1\16\1\17\2\0\1\24\1\25\1\12\2\0"+
-    "\1\12\3\0\1\26\13\0\1\27\1\0\1\26\3\0"+
-    "\1\14\14\0\2\14\1\0\2\3\2\15\2\0\2\30"+
-    "\1\17\2\0\1\30\1\0\1\12\2\0\1\15\1\0"+
-    "\2\3\1\16\12\0\1\3\2\0\1\16\1\0\2\3"+
-    "\1\17\1\15\1\23\3\0\1\17\4\0\1\12\2\0"+
-    "\1\17\3\0\1\20\1\5\14\0\1\20\1\0\2\3"+
-    "\1\21\1\5\1\23\3\0\1\21\4\0\1\12\2\0"+
-    "\1\21\3\0\1\23\1\0\1\23\3\0\1\17\7\0"+
-    "\1\23\1\0\2\3\1\24\1\15\4\0\1\17\4\0"+
-    "\1\12\2\0\1\24\3\0\1\25\12\0\1\24\2\0"+
-    "\1\25\3\0\1\27\13\0\1\27\1\0\1\27\3\0"+
-    "\1\30\1\15\14\0\1\30";
+    "\1\2\1\3\1\4\1\5\1\6\2\2\1\7\2\2"+
+    "\1\10\2\2\1\11\1\12\1\13\1\14\1\15\1\16"+
+    "\3\2\1\17\1\20\1\21\2\2\1\22\2\23\37\0"+
+    "\1\24\3\0\2\25\1\0\5\25\20\0\1\25\5\0"+
+    "\1\4\2\0\1\4\1\0\1\26\2\4\20\0\1\4"+
+    "\2\0\1\4\2\0\1\5\2\0\1\5\1\27\1\30"+
+    "\2\5\20\0\1\5\5\0\1\6\2\0\1\6\1\27"+
+    "\1\31\2\6\20\0\1\6\5\0\1\32\2\0\1\33"+
+    "\1\34\3\32\20\0\1\32\3\0\1\5\1\6\5\0"+
+    "\1\35\3\0\1\6\24\0\2\11\1\0\10\11\2\36"+
+    "\1\0\1\37\1\0\1\37\1\40\2\0\1\37\1\0"+
+    "\1\22\1\0\1\11\5\0\1\12\1\11\1\0\1\12"+
+    "\1\41\1\42\2\12\3\11\2\36\1\0\1\37\1\0"+
+    "\1\37\1\40\2\0\1\37\1\0\1\22\1\0\1\12"+
+    "\5\0\2\13\1\0\5\13\2\11\1\13\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\13\5\0\2\14\1\0\5\14\3\11\2\14"+
+    "\2\0\2\43\1\44\2\0\1\43\1\0\1\22\1\0"+
+    "\1\14\5\0\1\15\1\14\1\0\1\45\1\46\3\15"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\15\5\0\2\16\1\0\5\16\5\0"+
+    "\1\16\3\0\1\40\6\0\1\16\5\0\2\47\1\0"+
+    "\5\47\3\11\2\14\1\50\3\0\1\47\4\0\1\22"+
+    "\1\0\1\47\5\0\2\20\1\0\5\20\20\0\1\20"+
+    "\5\0\2\21\1\0\5\21\20\0\1\21\5\0\2\22"+
+    "\1\0\5\22\3\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\51\1\52\1\22\1\0\1\22\5\0"+
+    "\2\23\1\0\5\23\17\0\2\23\5\0\2\24\1\0"+
+    "\5\24\20\0\1\24\2\0\1\4\1\53\1\54\1\4"+
+    "\2\0\1\4\1\0\1\26\2\4\1\0\1\54\16\0"+
+    "\1\4\12\0\1\55\1\56\24\0\1\4\1\53\1\54"+
+    "\1\5\2\0\1\5\1\27\1\30\2\5\1\0\1\54"+
+    "\16\0\1\5\2\0\1\4\1\53\1\54\1\6\2\0"+
+    "\1\6\1\27\1\31\2\6\1\0\1\54\16\0\1\6"+
+    "\5\0\1\33\2\0\1\33\1\34\3\33\20\0\1\33"+
+    "\10\0\1\57\32\0\2\36\1\0\5\36\3\11\2\36"+
+    "\2\0\2\60\1\40\2\0\1\60\1\0\1\22\1\0"+
+    "\1\36\5\0\2\37\1\0\5\37\3\11\13\0\1\11"+
+    "\1\0\1\37\5\0\2\40\1\0\5\40\3\11\2\36"+
+    "\1\50\3\0\1\40\4\0\1\22\1\0\1\40\5\0"+
+    "\2\11\1\0\2\11\1\61\1\62\4\11\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\11\2\0\1\4\1\53\1\54\1\12\1\11"+
+    "\1\0\1\12\1\41\1\42\2\12\1\11\1\63\1\11"+
+    "\2\36\1\0\1\37\1\0\1\37\1\40\2\0\1\37"+
+    "\1\0\1\22\1\0\1\12\5\0\2\43\1\0\5\43"+
+    "\3\0\2\14\13\0\1\43\5\0\2\44\1\0\5\44"+
+    "\3\11\2\14\1\50\3\0\1\44\4\0\1\22\1\0"+
+    "\1\44\5\0\1\45\1\14\1\0\1\45\1\46\3\45"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\45\5\0\2\14\1\0\1\64\4\14"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\14\5\0\2\50\1\0\5\50\5\0"+
+    "\1\50\3\0\1\40\6\0\1\50\5\0\2\51\1\0"+
+    "\5\51\3\11\2\36\4\0\1\40\4\0\1\22\1\0"+
+    "\1\51\5\0\2\52\1\0\5\52\16\0\1\51\1\0"+
+    "\1\52\2\0\1\4\2\0\1\53\2\0\1\53\1\65"+
+    "\1\66\2\53\20\0\1\53\5\0\1\54\2\0\1\54"+
+    "\1\65\1\67\2\54\20\0\1\54\2\0\1\4\1\53"+
+    "\1\54\5\0\1\70\3\0\1\54\32\0\1\56\1\71"+
+    "\26\0\1\57\2\0\1\57\1\0\3\57\20\0\1\57"+
+    "\5\0\2\60\1\0\5\60\3\0\2\36\13\0\1\60"+
+    "\2\0\1\4\1\53\1\54\2\11\1\0\2\11\1\72"+
+    "\3\11\1\63\1\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\11\5\0"+
+    "\2\11\1\0\3\11\1\62\1\73\3\11\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\11\5\0\1\63\1\11\1\0\1\63\1\74"+
+    "\1\75\2\63\3\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\63\5\0"+
+    "\1\64\1\14\1\0\1\64\1\14\3\64\3\11\2\14"+
+    "\2\0\2\43\1\44\2\0\1\43\1\0\1\22\1\0"+
+    "\1\64\12\0\1\55\25\0\1\4\1\53\1\54\1\53"+
+    "\2\0\1\53\1\65\1\66\2\53\1\0\1\54\16\0"+
+    "\1\53\2\0\1\4\1\53\2\54\2\0\1\54\1\65"+
+    "\1\67\2\54\1\0\1\54\16\0\1\54\3\0\1\53"+
+    "\1\54\5\0\1\70\3\0\1\54\22\0\1\53\1\54"+
+    "\2\11\1\0\2\11\1\72\3\11\1\63\1\11\2\36"+
+    "\1\0\1\37\1\0\1\37\1\40\2\0\1\37\1\0"+
+    "\1\22\1\0\1\11\5\0\2\11\1\0\2\11\1\61"+
+    "\5\11\2\36\1\0\1\37\1\0\1\37\1\40\2\0"+
+    "\1\37\1\0\1\22\1\0\1\11\2\0\1\4\1\53"+
+    "\1\54\1\63\1\11\1\0\1\63\1\74\1\75\2\63"+
+    "\1\11\1\63\1\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\63";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[396];
+    int [] result = new int[1650];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -329,7 +464,7 @@ public final class StandardTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -340,11 +475,12 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\13\1\1\0\1\1\1\0\1\1\1\0"+
-    "\2\1\2\0\1\1\1\0";
+    "\1\0\1\11\22\1\1\0\4\1\1\0\1\1\2\0"+
+    "\1\1\1\0\3\1\1\0\3\1\1\0\2\1\1\0"+
+    "\2\1\2\0\1\1\1\0\7\1\1\0\1\11\4\1";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;
@@ -401,11 +537,11 @@ public final class StandardTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -447,6 +583,9 @@ public final class StandardTokenizerImpl {
 
   /** Hangul token type */
   public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+  
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = StandardTokenizer.EMOJI;
 
   /** Character count processed so far */
   public final int yychar()
@@ -492,7 +631,7 @@ public final class StandardTokenizerImpl {
     char [] map = new char[0x110000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2836) {
+    while (i < 4122) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -500,6 +639,8 @@ public final class StandardTokenizerImpl {
     return map;
   }
 
+/* -------------------------------------------------------------------------------- */
+/* Begin Lucene-specific disable-buffer-expansion modifications to skeleton.default */
 
   /**
    * Refills the input buffer.
@@ -527,32 +668,45 @@ public final class StandardTokenizerImpl {
 
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
+    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;
+    if (requested == 0) {
+      return true;
     }
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
-        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+        if (numRead == requested) { // We might have requested too few chars to encode a full Unicode character.
           --zzEndRead;
           zzFinalHighSurrogate = 1;
-          if (totalRead == 1) { return true; }
+          if (numRead == 1) {
+            return true;
+          }
+        } else {                    // There is room in the buffer for at least one more char
+          int c = zzReader.read();  // Expecting to read a low surrogate char
+          if (c == -1) {
+            return true;
+          } else {
+            zzBuffer[zzEndRead++] = (char)c;
+            return false;
+          }
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
+/* End Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+/* ------------------------------------------------------------------------------ */
     
   /**
    * Closes the input stream.
@@ -773,49 +927,62 @@ public final class StandardTokenizerImpl {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
-          }
-        case 9: break;
-        case 2: 
-          { return WORD_TYPE;
-          }
-        case 10: break;
-        case 3: 
-          { return HANGUL_TYPE;
-          }
-        case 11: break;
-        case 4: 
-          { return NUMERIC_TYPE;
-          }
-        case 12: break;
-        case 5: 
-          { return KATAKANA_TYPE;
-          }
-        case 13: break;
-        case 6: 
-          { return IDEOGRAPHIC_TYPE;
-          }
-        case 14: break;
-        case 7: 
-          { return HIRAGANA_TYPE;
-          }
-        case 15: break;
-        case 8: 
-          { return SOUTH_EAST_ASIAN_TYPE;
-          }
-        case 16: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
               {
                 return YYEOF;
               }
-          } 
-          else {
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */
+            } 
+            // fall through
+          case 10: break;
+          case 2: 
+            { return EMOJI_TYPE;
+            } 
+            // fall through
+          case 11: break;
+          case 3: 
+            { return WORD_TYPE;
+            } 
+            // fall through
+          case 12: break;
+          case 4: 
+            { return HANGUL_TYPE;
+            } 
+            // fall through
+          case 13: break;
+          case 5: 
+            { return NUMERIC_TYPE;
+            } 
+            // fall through
+          case 14: break;
+          case 6: 
+            { return KATAKANA_TYPE;
+            } 
+            // fall through
+          case 15: break;
+          case 7: 
+            { return IDEOGRAPHIC_TYPE;
+            } 
+            // fall through
+          case 16: break;
+          case 8: 
+            { return HIRAGANA_TYPE;
+            } 
+            // fall through
+          case 17: break;
+          case 9: 
+            { return SOUTH_EAST_ASIAN_TYPE;
+            } 
+            // fall through
+          case 18: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
index a1e7b17..e95a9b4 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@@ -34,12 +34,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %integer
 %final
 %public
@@ -48,22 +49,67 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %char
 %buffer 255
 
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
-NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
-HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
+
+//////////////////////////////////////////////////////////////////////////
+// Begin Emoji Macros - see documentation below, near the EMOJI_TYPE rule
+
+// TODO: Remove this include file when JFlex supports these properties directly (in Unicode 11.0+)
+%include ../../../../../../data/jflex/UnicodeEmojiProperties.jflex
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+//   \uFE0E (Text Presentation Selector) and \uFE0F (Emoji Presentation Selector) - included in \p{WB:Extend}
+//   - are explicitly excluded here so that we can properly handle Emoji sequences.
+//
+ExtFmtZwjSansPresSel = [[\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]--[\uFE0E\uFE0F]]*
+
+KeyCapBaseChar = [0-9#*]
+KeyCapBaseCharEx = {KeyCapBaseChar} {ExtFmtZwjSansPresSel}
+KeyCap = \u20E3
+KeyCapEx = {KeyCap} {ExtFmtZwjSansPresSel}
+
+// # \u3030 = WAVY DASH; \u303D = PART ALTERNATION MARK
+AccidentalEmoji = [ยฉยฎโ„ข\u3030\u303D]
+EmojiRKAM = ( \p{WB:Regional_Indicator} | {KeyCapBaseChar} | {AccidentalEmoji} | {Emoji_Modifier} )
+
+// Unlike Unicode properties, macros are not allowed in character classes, so we achieve set difference
+// by applying DeMorgan: the expression that matches everything of 'a' not matched by 'b' is: !(!a|b)
+// TODO: Convert this expression to character class difference when JFlex supports the properties directly (in Unicode 11.0+)
+EmojiSansRKAM = !( ! {Emoji} | {EmojiRKAM} )
+
+EmojiChar = ( {Extended_Pictographic} | {EmojiSansRKAM} )
+
+EmojiCharEx         = {EmojiChar}           {ExtFmtZwjSansPresSel}
+EmojiModifierBaseEx = {Emoji_Modifier_Base} {ExtFmtZwjSansPresSel}
+EmojiModifierEx     = {Emoji_Modifier}      {ExtFmtZwjSansPresSel}
+
+EmojiPresentationSelector = \uFE0F
+EmojiCharOrPresSeqOrModSeq = ( \p{WB:ZWJ}* {EmojiCharEx} {EmojiPresentationSelector}? ) | ( ( \p{WB:ZWJ}* {EmojiModifierBaseEx} )? {EmojiModifierEx} )
+TagSpec = [\u{E0020}-\u{E007E}]
+TagTerm = \u{E007F}
+
+// End Emoji Macros
+//////////////////////////////////////////////////////////////////////////
+
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] {ExtFmtZwj}
+AHLetterEx          = [\p{WB:ALetter}\p{WB:Hebrew_Letter}]                      {ExtFmtZwj}
+NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        {ExtFmtZwj}
+KatakanaEx          = \p{WB:Katakana}                                           {ExtFmtZwj} 
+MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      {ExtFmtZwj} 
+MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         {ExtFmtZwj}
+ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       {ExtFmtZwj}
+HanEx               = \p{Script:Han}                                            {ExtFmtZwj}
+HiraganaEx          = \p{Script:Hiragana}                                       {ExtFmtZwj}
+SingleQuoteEx       = \p{WB:Single_Quote}                                       {ExtFmtZwj}
+DoubleQuoteEx       = \p{WB:Double_Quote}                                       {ExtFmtZwj}
+HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      {ExtFmtZwj}
+RegionalIndicatorEx = \p{WB:Regional_Indicator}                                 {ExtFmtZwj}
+ComplexContextEx    = \p{LB:Complex_Context}                                    {ExtFmtZwj}
 
 %{
   /** Alphanumeric sequences */
@@ -93,6 +139,9 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
   /** Hangul token type */
   public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+  
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = StandardTokenizer.EMOJI;
 
   /** Character count processed so far */
   public final int yychar()
@@ -120,18 +169,64 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
 %%
 
-// UAX#29 WB1.   sot   รท
-//        WB2.     รท   eot
+// UAX#29 WB1.    sot รท Any
+//        WB2.    Any รท eot
 //
 <<EOF>> { return YYEOF; }
 
-// UAX#29 WB8.   Numeric ร— Numeric
-//        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) ร— Numeric
-//        WB12.  Numeric ร— (MidNum | MidNumLet | Single_Quote) Numeric
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-//        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
+// Instead of these: UAX#29 WB3c. ZWJ ร— (Glue_After_Zwj | EBG)
+//                          WB14. (E_Base | EBG) ร— E_Modifier
+//                          WB15. ^ (RI RI)* RI ร— RI
+//                          WB16. [^RI] (RI RI)* RI ร— RI
+//
+// We use the "emoji_sequence" rule from http://www.unicode.org/reports/tr51/tr51-14.html (Unicode 11.0)
+// and the Emoji data from http://unicode.org/Public/emoji/11.0/emoji-data.txt (in included file UnicodeEmojiProperties.jflex)
+// 
+// emoji_sequence :=
+//    Top-level EBNF           Expanded #1                       Expanded #2                       Expanded #3
+//    ---------------------    ----------------------------      -----------------------------     ----------------------------------------------
+//      emoji_core_sequence      emoji_combining_sequence          emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_keycap_sequence           | [0-9#*] \u{FE0F 20E3}      [1]
+//                             | emoji_modifier_sequence                                           | \p{Emoji_Modifier_Base} \p{Emoji_Modifier}
+//                             | emoji_flag_sequence                                               | \p{WB:Regional_Indicator}{2}               )
+//
+//    | emoji_zwj_sequence       emoji_zwj_element                 emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+//                             ( ZWJ emoji_zwj_element )+                                          ( \p{WB:ZWJ} ^^ )+
+// 
+//    | emoji_tag_sequence     tag_base                            emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+//                             tag_spec                                                            [\u{E0020}-\u{E007E}]+
+//                             tag_term                                                            \u{E007F}
+//
+// [1] https://unicode.org/Public/emoji/11.0/emoji-test.txt includes key cap sequences 
+//     WITHOUT \uFE0F (emoji presentation indicator), annotating them as "non-fully-qualified";
+//     TR#51 says about non-fully-qualified *ZWJ sequences* that implementations may
+//     choose whether to support them for segmentation.  This implementation will
+//     recognize /[0-9#*]\u20E3/ - i.e. without \uFE0F - as Emoji. 
+//
+// See also: http://www.unicode.org/L2/L2016/16315-handling-seg-emoji.pdf
+//           https://docs.google.com/document/d/1yDZ5TUZNVVKaM9zYCCLbRIAKGNZANsAGl0bcNzGGvn8
+//
+//     In particular, the above docs recommend a modified UAX#29 WB3c rule (covered by TR#51's "emoji_zwj_sequence"):
+//
+//         WB3cโ€ฒ ZWJ ร— โ€‹(Extended_Pictographic | EmojiNRK)
+//
+  {EmojiCharOrPresSeqOrModSeq} ( ( \p{WB:ZWJ} {EmojiCharOrPresSeqOrModSeq} )* | {TagSpec}+ {TagTerm} ) 
+| {KeyCapBaseCharEx} {EmojiPresentationSelector}? {KeyCapEx} 
+| {RegionalIndicatorEx}{2} 
+  { return EMOJI_TYPE; }
+
+// UAX#29 WB8.    Numeric ร— Numeric
+//        WB11.   Numeric (MidNum | MidNumLetQ) ร— Numeric
+//        WB12.   Numeric ร— (MidNum | MidNumLetQ) Numeric
+//        WB13a.  (AHLetter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+//        WB13b.  ExtendNumLet ร— (AHLetter | Numeric | Katakana)
 //
-{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* 
+{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
   { return NUMERIC_TYPE; }
 
 // subset of the below for typing purposes only!
@@ -141,28 +236,28 @@ ComplexContextEx    = \p{LB:Complex_Context}
 {KatakanaEx}+
   { return KATAKANA_TYPE; }
 
-// UAX#29 WB5.   (ALetter | Hebrew_Letter) ร— (ALetter | Hebrew_Letter)
-//        WB6.   (ALetter | Hebrew_Letter) ร— (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-//        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) ร— (ALetter | Hebrew_Letter)
-//        WB7a.  Hebrew_Letter ร— Single_Quote
-//        WB7b.  Hebrew_Letter ร— Double_Quote Hebrew_Letter
-//        WB7c.  Hebrew_Letter Double_Quote ร— Hebrew_Letter
-//        WB9.   (ALetter | Hebrew_Letter) ร— Numeric
-//        WB10.  Numeric ร— (ALetter | Hebrew_Letter)
-//        WB13.  Katakana ร— Katakana
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-//        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
-//
-{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+// UAX#29 WB5.    AHLetter ร— AHLetter
+//        WB6.    AHLetter ร— (MidLetter | MidNumLetQ) AHLetter
+//        WB7.    AHLetter (MidLetter | MidNumLetQ) ร— AHLetter
+//        WB7a.   Hebrew_Letter ร— Single_Quote
+//        WB7b.   Hebrew_Letter ร— Double_Quote Hebrew_Letter
+//        WB7c.   Hebrew_Letter Double_Quote ร— Hebrew_Letter
+//        WB9.    AHLetter ร— Numeric
+//        WB10.   Numeric ร— AHLetter
+//        WB13.   Katakana ร— Katakana
+//        WB13a.  (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+//        WB13b.  ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
+//
+{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                     | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                      )+
                    )
-({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                     | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                      )+
                    )
 )*
@@ -172,13 +267,13 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
 // From UAX #29:
 //
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
+//    [C]haracters with the Line_Break property values of Contingent_Break (CB),
+//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word
 //    boundary property values based on criteria outside of the scope of this
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 9.0, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC ( ๏ฟผ ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -191,17 +286,14 @@ ComplexContextEx    = \p{LB:Complex_Context}
 //
 {ComplexContextEx}+ { return SOUTH_EAST_ASIAN_TYPE; }
 
-// UAX#29 WB14.  Any รท Any
+// UAX#29 WB999.  Any รท Any
 //
 {HanEx} { return IDEOGRAPHIC_TYPE; }
 {HiraganaEx} { return HIRAGANA_TYPE; }
 
-
-// UAX#29 WB3.   CR ร— LF
-//        WB3a.  (Newline | CR | LF) รท
-//        WB3b.  รท (Newline | CR | LF)
-//        WB13c. Regional_Indicator ร— Regional_Indicator
-//        WB14.  Any รท Any
+// UAX#29 WB3.    CR ร— LF
+//        WB3a.   (Newline | CR | LF) รท
+//        WB3b.   รท (Newline | CR | LF)
+//        WB999.  Any รท Any
 //
-{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-  { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */ }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
index 6abbc2b..615b565 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
@@ -18,8 +18,11 @@ package org.apache.lucene.analysis.standard;
 
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Random;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -27,6 +30,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TestUtil;
 
@@ -282,7 +286,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
   }
   
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+    WordBreakTestUnicode_9_0_0 wordBreakTest = new WordBreakTestUnicode_9_0_0();
     wordBreakTest.test(a);
   }
   
@@ -358,8 +362,80 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
     BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new String[] { "3_1", "2" });
   }
 
-
-
+  /** simple emoji */
+  public void testEmoji() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉ ๐Ÿ’ฉ๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "๐Ÿ’ฉ", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence */
+  public void testEmojiSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ",
+        new String[] { "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence with fitzpatrick modifier */
+  public void testEmojiSequenceWithModifier() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ",
+        new String[] { "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** regional indicator */
+  public void testEmojiRegionalIndicator() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡บ๐Ÿ‡ธ",
+        new String[] { "๐Ÿ‡บ๐Ÿ‡ธ", "๐Ÿ‡บ๐Ÿ‡ธ" },
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** variation sequence */
+  public void testEmojiVariationSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#๏ธโƒฃ",
+        new String[] { "#๏ธโƒฃ" },
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3๏ธโƒฃ",
+        new String[] { "3๏ธโƒฃ",},
+        new String[] { "<EMOJI>" });
+
+    // text presentation sequences
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
+        new String[] { },
+        new String[] { });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
+        new String[] { "3\uFE0E",},
+        new String[] { "<NUM>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
+        new String[] { "\u2B55",},
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
+        new String[] { "\u2B55", "\u200D\u2B55"},
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  public void testEmojiTagSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ",
+        new String[] { "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  public void testEmojiTokenization() throws Exception {
+    // simple emoji around latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "poo๐Ÿ’ฉpoo",
+        new String[] { "poo", "๐Ÿ’ฉ", "poo" },
+        new String[] { "<ALPHANUM>", "<EMOJI>", "<ALPHANUM>" });
+    // simple emoji around non-latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉไธญๅœ‹๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "ไธญ", "ๅœ‹", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<EMOJI>" });
+  }
+  
+  public void testUnicodeEmojiTests() throws Exception {
+    EmojiTokenizationTestUnicode_11_0 emojiTest = new EmojiTokenizationTestUnicode_11_0();
+    emojiTest.test(a);
+  }
+  
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     Analyzer analyzer = new StandardAnalyzer();
@@ -416,4 +492,53 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
     assertAnalyzesTo(a, "ab cd toolong xy z", new String[]{"ab", "cd", "toolo", "ng", "xy", "z"});
     a.close();
   }
+
+  public void testSplitSurrogatePairWithSpoonFeedReader() throws Exception {
+    String text = "12345678\ud800\udf00"; // U+D800 U+DF00 = U+10300 = ๐Œ€ (OLD ITALIC LETTER A)
+    
+    // Collect tokens with normal reader
+    StandardAnalyzer a = new StandardAnalyzer();
+    TokenStream ts = a.tokenStream("dummy", text);
+    List<String> tokens = new ArrayList<>();
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+    ts.reset();
+    while (ts.incrementToken()) {
+      tokens.add(termAtt.toString());
+    }
+    ts.end();
+    ts.close();
+
+    // Tokens from a spoon-feed reader should be the same as from a normal reader
+    // The 9th char is a high surrogate, so the 9-max-chars spoon-feed reader will split the surrogate pair at a read boundary
+    Reader reader = new SpoonFeedMaxCharsReaderWrapper(9, new StringReader(text));
+    ts = a.tokenStream("dummy", reader);
+    termAtt = ts.addAttribute(CharTermAttribute.class);
+    ts.reset();
+    for (int tokenNum = 0 ; ts.incrementToken() ; ++tokenNum) {
+      assertEquals("token #" + tokenNum + " mismatch: ", termAtt.toString(), tokens.get(tokenNum));
+    }
+    ts.end();
+    ts.close();
+  }
+}
+
+class SpoonFeedMaxCharsReaderWrapper extends Reader {
+  private final Reader in;
+  private final int maxChars; 
+
+  public SpoonFeedMaxCharsReaderWrapper(int maxChars, Reader in) {
+    this.in = in;
+    this.maxChars = maxChars;
+  }
+
+  @Override
+  public void close() throws IOException {
+    in.close();
+  }
+
+  /** Returns the configured number of chars if available */
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    return in.read(cbuf, off, Math.min(maxChars, len));
+  }
 }


[23/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
index a797082..3d964d9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -39,6 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
@@ -69,156 +70,221 @@ public final class UAX29URLEmailTokenizerImpl {
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\1\112\10\110\2\112\2\110\1\112\23\110\1\113\1\17\1\103\1\113"+
-    "\1\75\1\73\1\16\2\76\2\113\1\77\1\57\1\24\1\102\1\65"+
-    "\1\60\1\63\1\62\1\61\1\71\1\70\1\66\1\67\1\64\1\72"+
-    "\1\106\1\110\1\107\1\110\1\101\1\100\1\25\1\30\1\37\1\42"+
-    "\1\36\1\50\1\41\1\32\1\35\1\56\1\51\1\40\1\44\1\46"+
-    "\1\33\1\27\1\54\1\26\1\47\1\31\1\43\1\34\1\55\1\53"+
-    "\1\45\1\52\1\104\1\111\1\105\1\114\1\74\1\114\1\25\1\30"+
-    "\1\37\1\42\1\36\1\50\1\41\1\32\1\35\1\56\1\51\1\40"+
-    "\1\44\1\46\1\33\1\27\1\54\1\26\1\47\1\31\1\43\1\34"+
-    "\1\55\1\53\1\45\1\52\3\114\1\73\1\115\52\0\1\14\2\0"+
-    "\1\3\7\0\1\14\1\0\1\7\2\0\1\14\5\0\27\14\1\0"+
-    "\37\14\1\0\u01ca\14\4\0\14\14\5\0\1\7\10\0\5\14\7\0"+
-    "\1\14\1\0\1\14\21\0\160\117\5\14\1\0\2\14\2\0\4\14"+
-    "\1\10\7\0\1\14\1\7\3\14\1\0\1\14\1\0\24\14\1\0"+
-    "\123\14\1\0\213\14\1\0\7\117\236\14\11\0\46\14\2\0\1\14"+
-    "\7\0\47\14\1\0\1\10\7\0\55\117\1\0\1\117\1\0\2\117"+
-    "\1\0\2\117\1\0\1\117\10\0\33\20\5\0\3\20\1\1\1\7"+
-    "\13\0\5\3\7\0\2\10\2\0\13\117\1\0\1\3\3\0\53\14"+
-    "\25\117\12\4\1\0\1\5\1\10\1\0\2\14\1\117\143\14\1\0"+
-    "\1\14\7\117\1\3\1\0\6\117\2\14\2\117\1\0\4\117\2\14"+
-    "\12\4\3\14\2\0\1\14\17\0\1\3\1\14\1\117\36\14\33\117"+
-    "\2\0\131\14\13\117\1\14\16\0\12\4\41\14\11\117\2\14\2\0"+
-    "\1\10\1\0\1\14\5\0\26\14\4\117\1\14\11\117\1\14\3\117"+
-    "\1\14\5\117\22\0\31\14\3\117\104\0\1\14\1\0\13\14\67\0"+
-    "\33\117\1\0\4\117\66\14\3\117\1\14\22\117\1\14\7\117\12\14"+
-    "\2\117\2\0\12\4\1\0\7\14\1\0\7\14\1\0\3\117\1\0"+
-    "\10\14\2\0\2\14\2\0\26\14\1\0\7\14\1\0\1\14\3\0"+
-    "\4\14\2\0\1\117\1\14\7\117\2\0\2\117\2\0\3\117\1\14"+
-    "\10\0\1\117\4\0\2\14\1\0\3\14\2\117\2\0\12\4\2\14"+
-    "\17\0\3\117\1\0\6\14\4\0\2\14\2\0\26\14\1\0\7\14"+
-    "\1\0\2\14\1\0\2\14\1\0\2\14\2\0\1\117\1\0\5\117"+
-    "\4\0\2\117\2\0\3\117\3\0\1\117\7\0\4\14\1\0\1\14"+
-    "\7\0\12\4\2\117\3\14\1\117\13\0\3\117\1\0\11\14\1\0"+
-    "\3\14\1\0\26\14\1\0\7\14\1\0\2\14\1\0\5\14\2\0"+
-    "\1\117\1\14\10\117\1\0\3\117\1\0\3\117\2\0\1\14\17\0"+
-    "\2\14\2\117\2\0\12\4\21\0\3\117\1\0\10\14\2\0\2\14"+
-    "\2\0\26\14\1\0\7\14\1\0\2\14\1\0\5\14\2\0\1\117"+
-    "\1\14\7\117\2\0\2\117\2\0\3\117\10\0\2\117\4\0\2\14"+
-    "\1\0\3\14\2\117\2\0\12\4\1\0\1\14\20\0\1\117\1\14"+
-    "\1\0\6\14\3\0\3\14\1\0\4\14\3\0\2\14\1\0\1\14"+
-    "\1\0\2\14\3\0\2\14\3\0\3\14\3\0\14\14\4\0\5\117"+
-    "\3\0\3\117\1\0\4\117\2\0\1\14\6\0\1\117\16\0\12\4"+
-    "\21\0\3\117\1\0\10\14\1\0\3\14\1\0\27\14\1\0\12\14"+
-    "\1\0\5\14\3\0\1\14\7\117\1\0\3\117\1\0\4\117\7\0"+
-    "\2\117\1\0\2\14\6\0\2\14\2\117\2\0\12\4\22\0\2\117"+
-    "\1\0\10\14\1\0\3\14\1\0\27\14\1\0\12\14\1\0\5\14"+
-    "\2\0\1\117\1\14\7\117\1\0\3\117\1\0\4\117\7\0\2\117"+
-    "\7\0\1\14\1\0\2\14\2\117\2\0\12\4\1\0\2\14\17\0"+
-    "\2\117\1\0\10\14\1\0\3\14\1\0\51\14\2\0\1\14\7\117"+
-    "\1\0\3\117\1\0\4\117\1\14\10\0\1\117\10\0\2\14\2\117"+
-    "\2\0\12\4\12\0\6\14\2\0\2\117\1\0\22\14\3\0\30\14"+
-    "\1\0\11\14\1\0\1\14\2\0\7\14\3\0\1\117\4\0\6\117"+
-    "\1\0\1\117\1\0\10\117\22\0\2\117\15\0\60\123\1\23\2\123"+
-    "\7\23\5\0\7\123\10\23\1\0\12\4\47\0\2\123\1\0\1\123"+
-    "\2\0\2\123\1\0\1\123\2\0\1\123\6\0\4\123\1\0\7\123"+
-    "\1\0\3\123\1\0\1\123\1\0\1\123\2\0\2\123\1\0\4\123"+
-    "\1\23\2\123\6\23\1\0\2\23\1\123\2\0\5\123\1\0\1\123"+
-    "\1\0\6\23\2\0\12\4\2\0\4\123\40\0\1\14\27\0\2\117"+
-    "\6\0\12\4\13\0\1\117\1\0\1\117\1\0\1\117\4\0\2\117"+
-    "\10\14\1\0\44\14\4\0\24\117\1\0\2\117\5\14\13\117\1\0"+
-    "\44\117\11\0\1\117\71\0\53\123\24\23\1\123\12\4\6\0\6\123"+
-    "\4\23\4\123\3\23\1\123\3\23\2\123\7\23\3\123\4\23\15\123"+
-    "\14\23\1\123\1\23\12\4\4\23\2\22\46\14\1\0\1\14\5\0"+
-    "\1\14\2\0\53\14\1\0\4\14\u0100\2\111\14\1\0\4\14\2\0"+
-    "\7\14\1\0\1\14\1\0\4\14\2\0\51\14\1\0\4\14\2\0"+
-    "\41\14\1\0\4\14\2\0\7\14\1\0\1\14\1\0\4\14\2\0"+
-    "\17\14\1\0\71\14\1\0\4\14\2\0\103\14\2\0\3\117\40\0"+
-    "\20\14\20\0\125\14\14\0\u026c\14\2\0\21\14\1\0\32\14\5\0"+
-    "\113\14\3\0\3\14\17\0\15\14\1\0\4\14\3\117\13\0\22\14"+
-    "\3\117\13\0\22\14\2\117\14\0\15\14\1\0\3\14\1\0\2\117"+
-    "\14\0\64\123\40\23\3\0\1\123\4\0\1\123\1\23\2\0\12\4"+
-    "\41\0\3\117\1\3\1\0\12\4\6\0\130\14\10\0\51\14\1\117"+
-    "\1\14\5\0\106\14\12\0\35\14\3\0\14\117\4\0\14\117\12\0"+
-    "\12\4\36\123\2\0\5\123\13\0\54\123\4\0\21\23\7\123\2\23"+
-    "\6\0\12\4\1\22\3\0\2\22\40\0\27\14\5\117\4\0\65\123"+
-    "\12\23\1\0\35\23\2\0\1\117\12\4\6\0\12\4\6\0\7\22"+
-    "\1\123\6\22\122\0\5\117\57\14\21\117\7\14\4\0\12\4\21\0"+
-    "\11\117\14\0\3\117\36\14\15\117\2\14\12\4\54\14\16\117\14\0"+
-    "\44\14\24\117\10\0\12\4\3\0\3\14\12\4\44\14\122\0\3\117"+
-    "\1\0\25\117\4\14\1\117\4\14\3\117\2\14\11\0\300\14\47\117"+
-    "\25\0\4\117\u0116\14\2\0\6\14\2\0\46\14\2\0\6\14\2\0"+
-    "\10\14\1\0\1\14\1\0\1\14\1\0\1\14\1\0\37\14\2\0"+
-    "\65\14\1\0\7\14\1\0\1\14\3\0\3\14\1\0\7\14\3\0"+
-    "\4\14\2\0\6\14\4\0\15\14\5\0\3\14\1\0\7\14\17\0"+
-    "\4\3\10\0\2\11\12\0\1\11\2\0\1\7\2\0\5\3\20\0"+
-    "\2\12\3\0\1\10\17\0\1\12\13\0\5\3\1\0\12\3\1\0"+
-    "\1\14\15\0\1\14\20\0\15\14\63\0\41\117\21\0\1\14\4\0"+
-    "\1\14\2\0\12\14\1\0\1\14\3\0\5\14\6\0\1\14\1\0"+
-    "\1\14\1\0\1\14\1\0\4\14\1\0\13\14\2\0\4\14\5\0"+
-    "\5\14\4\0\1\14\21\0\51\14\u032d\0\64\14\u0716\0\57\14\1\0"+
-    "\57\14\1\0\205\14\6\0\4\14\3\117\2\14\14\0\46\14\1\0"+
-    "\1\14\5\0\1\14\2\0\70\14\7\0\1\14\17\0\1\117\27\14"+
-    "\11\0\7\14\1\0\7\14\1\0\7\14\1\0\7\14\1\0\7\14"+
-    "\1\0\7\14\1\0\7\14\1\0\7\14\1\0\40\117\57\0\1\14"+
-    "\120\0\32\13\1\0\131\13\14\0\326\13\57\0\1\14\1\116\1\121"+
-    "\31\0\11\121\6\117\1\0\5\120\2\0\3\121\1\14\1\14\4\0"+
-    "\126\122\2\0\2\117\2\6\3\122\1\6\132\120\1\0\4\120\5\0"+
-    "\51\14\3\0\136\2\21\0\33\14\65\0\20\120\320\0\57\6\1\0"+
-    "\130\6\250\0\u19b6\121\112\0\u51cd\121\63\0\u048d\14\103\0\56\14\2\0"+
-    "\u010d\14\3\0\20\14\12\4\2\14\24\0\57\14\4\117\1\0\12\117"+
-    "\1\0\31\14\7\0\1\117\120\14\2\117\45\0\11\14\2\0\147\14"+
-    "\2\0\4\14\1\0\4\14\14\0\13\14\115\0\12\14\1\117\3\14"+
-    "\1\117\4\14\1\117\27\14\5\117\30\0\64\14\14\0\2\117\62\14"+
-    "\21\117\13\0\12\4\6\0\22\117\6\14\3\0\1\14\4\0\12\4"+
-    "\34\14\10\117\2\0\27\14\15\117\14\0\35\2\3\0\4\117\57\14"+
-    "\16\117\16\0\1\14\12\4\46\0\51\14\16\117\11\0\3\14\1\117"+
-    "\10\14\2\117\2\0\12\4\6\0\27\123\3\22\1\123\1\23\4\0"+
-    "\60\123\1\23\1\123\3\23\2\123\2\23\5\123\2\23\1\123\1\23"+
-    "\1\123\30\0\3\123\2\22\13\14\5\117\2\0\3\14\2\117\12\0"+
-    "\6\14\2\0\6\14\2\0\6\14\11\0\7\14\1\0\7\14\221\0"+
-    "\43\14\10\117\1\0\2\117\2\0\12\4\6\0\u2ba4\2\14\0\27\2"+
-    "\4\0\61\2\u2104\0\u016e\121\2\0\152\121\46\0\7\14\14\0\5\14"+
-    "\5\0\1\20\1\117\12\20\1\0\15\20\1\0\5\20\1\0\1\20"+
-    "\1\0\2\20\1\0\2\20\1\0\12\20\142\14\41\0\u016b\14\22\0"+
-    "\100\14\2\0\66\14\50\0\14\14\4\0\20\117\1\10\2\0\1\7"+
-    "\1\10\13\0\7\117\14\0\2\12\30\0\3\12\1\10\1\0\1\11"+
-    "\1\0\1\10\1\7\32\0\5\14\1\0\207\14\2\0\1\3\7\0"+
-    "\1\11\4\0\1\10\1\0\1\11\1\0\12\4\1\7\1\10\5\0"+
-    "\32\14\4\0\1\12\1\0\32\14\13\0\70\120\2\117\37\2\3\0"+
-    "\6\2\2\0\6\2\2\0\6\2\2\0\3\2\34\0\3\3\4\0"+
-    "\14\14\1\0\32\14\1\0\23\14\1\0\2\14\1\0\17\14\2\0"+
-    "\16\14\42\0\173\14\105\0\65\14\210\0\1\117\202\0\35\14\3\0"+
-    "\61\14\57\0\37\14\21\0\33\14\65\0\36\14\2\0\44\14\4\0"+
-    "\10\14\1\0\5\14\52\0\236\14\2\0\12\4\u0356\0\6\14\2\0"+
-    "\1\14\1\0\54\14\1\0\2\14\3\0\1\14\2\0\27\14\252\0"+
-    "\26\14\12\0\32\14\106\0\70\14\6\0\2\14\100\0\1\14\3\117"+
-    "\1\0\2\117\5\0\4\117\4\14\1\0\3\14\1\0\33\14\4\0"+
-    "\3\117\4\0\1\117\40\0\35\14\203\0\66\14\12\0\26\14\12\0"+
-    "\23\14\215\0\111\14\u03b7\0\3\117\65\14\17\117\37\0\12\4\20\0"+
-    "\3\117\55\14\13\117\2\0\1\3\22\0\31\14\7\0\12\4\6\0"+
-    "\3\117\44\14\16\117\1\0\12\4\100\0\3\117\60\14\16\117\4\14"+
-    "\13\0\12\4\u04a6\0\53\14\15\117\10\0\12\4\u0936\0\u036f\14\221\0"+
-    "\143\14\u0b9d\0\u042f\14\u33d1\0\u0239\14\u04c7\0\105\14\13\0\1\14\56\117"+
-    "\20\0\4\117\15\14\u4060\0\1\120\1\122\u2163\0\5\117\3\0\6\117"+
-    "\10\3\10\117\2\0\7\117\36\0\4\117\224\0\3\117\u01bb\0\125\14"+
-    "\1\0\107\14\1\0\2\14\2\0\1\14\2\0\2\14\2\0\4\14"+
-    "\1\0\14\14\1\0\1\14\1\0\7\14\1\0\101\14\1\0\4\14"+
-    "\2\0\10\14\1\0\7\14\1\0\34\14\1\0\4\14\1\0\5\14"+
-    "\1\0\1\14\3\0\7\14\1\0\u0154\14\2\0\31\14\1\0\31\14"+
-    "\1\0\37\14\1\0\31\14\1\0\37\14\1\0\31\14\1\0\37\14"+
-    "\1\0\31\14\1\0\37\14\1\0\31\14\1\0\10\14\2\0\62\4"+
-    "\u1600\0\4\14\1\0\33\14\1\0\2\14\1\0\1\14\2\0\1\14"+
-    "\1\0\12\14\1\0\4\14\1\0\1\14\1\0\1\14\6\0\1\14"+
-    "\4\0\1\14\1\0\1\14\1\0\1\14\1\0\3\14\1\0\2\14"+
-    "\1\0\1\14\2\0\1\14\1\0\1\14\1\0\1\14\1\0\1\14"+
-    "\1\0\1\14\1\0\2\14\1\0\1\14\2\0\4\14\1\0\7\14"+
-    "\1\0\4\14\1\0\4\14\1\0\1\14\1\0\12\14\1\0\21\14"+
-    "\5\0\3\14\1\0\5\14\1\0\21\14\u032a\0\32\21\1\15\u0dff\0"+
-    "\ua6d7\121\51\0\u1035\121\13\0\336\121\u3fe2\0\u021e\121\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
-    "\1\3\36\0\140\3\200\0\360\117\uffff\0\uffff\0\ufe12\0";
+    "\1\125\10\123\2\125\2\123\1\125\23\123\1\126\1\34\1\7\1\126"+
+    "\1\110\1\106\1\33\2\111\1\112\1\126\1\113\1\73\1\40\1\116"+
+    "\1\101\1\74\1\77\1\76\1\75\1\22\1\104\1\102\1\103\1\100"+
+    "\1\105\1\121\1\123\1\122\1\123\1\115\1\114\1\41\1\44\1\53"+
+    "\1\56\1\52\1\64\1\55\1\46\1\51\1\72\1\65\1\54\1\60"+
+    "\1\62\1\47\1\43\1\70\1\42\1\63\1\45\1\57\1\50\1\71"+
+    "\1\67\1\61\1\66\1\117\1\124\1\120\1\127\1\107\1\127\1\41"+
+    "\1\44\1\53\1\56\1\52\1\64\1\55\1\46\1\51\1\72\1\65"+
+    "\1\54\1\60\1\62\1\47\1\43\1\70\1\42\1\63\1\45\1\57"+
+    "\1\50\1\71\1\67\1\61\1\66\3\127\1\106\1\130\51\0\1\5"+
+    "\1\31\2\0\1\1\1\5\6\0\1\31\1\0\1\24\2\0\1\31"+
+    "\5\0\27\31\1\0\37\31\1\0\u01ca\31\4\0\14\31\5\0\1\24"+
+    "\10\0\5\31\7\0\1\31\1\0\1\31\21\0\160\132\5\31\1\0"+
+    "\2\31\2\0\4\31\1\25\1\31\6\0\1\31\1\24\3\31\1\0"+
+    "\1\31\1\0\24\31\1\0\123\31\1\0\213\31\1\0\7\132\246\31"+
+    "\1\0\46\31\2\0\1\31\7\0\47\31\1\0\1\25\7\0\55\132"+
+    "\1\0\1\132\1\0\2\132\1\0\2\132\1\0\1\132\10\0\33\35"+
+    "\5\0\3\35\1\15\1\24\13\0\6\1\6\0\2\25\2\0\13\132"+
+    "\1\0\1\1\3\0\53\31\25\132\12\20\1\0\1\21\1\25\1\0"+
+    "\2\31\1\132\143\31\1\0\1\31\7\132\1\1\1\0\6\132\2\31"+
+    "\2\132\1\0\4\132\2\31\12\20\3\31\2\0\1\31\17\0\1\1"+
+    "\1\31\1\132\36\31\33\132\2\0\131\31\13\132\1\31\16\0\12\20"+
+    "\41\31\11\132\2\31\2\0\1\25\1\0\1\31\5\0\26\31\4\132"+
+    "\1\31\11\132\1\31\3\132\1\31\5\132\22\0\31\31\3\132\104\0"+
+    "\25\31\1\0\10\31\26\0\16\132\1\1\41\132\66\31\3\132\1\31"+
+    "\22\132\1\31\7\132\12\31\2\132\2\0\12\20\1\0\20\31\3\132"+
+    "\1\0\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\1\31"+
+    "\3\0\4\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132"+
+    "\1\31\10\0\1\132\4\0\2\31\1\0\3\31\2\132\2\0\12\20"+
+    "\2\31\17\0\3\132\1\0\6\31\4\0\2\31\2\0\26\31\1\0"+
+    "\7\31\1\0\2\31\1\0\2\31\1\0\2\31\2\0\1\132\1\0"+
+    "\5\132\4\0\2\132\2\0\3\132\3\0\1\132\7\0\4\31\1\0"+
+    "\1\31\7\0\12\20\2\132\3\31\1\132\13\0\3\132\1\0\11\31"+
+    "\1\0\3\31\1\0\26\31\1\0\7\31\1\0\2\31\1\0\5\31"+
+    "\2\0\1\132\1\31\10\132\1\0\3\132\1\0\3\132\2\0\1\31"+
+    "\17\0\2\31\2\132\2\0\12\20\11\0\1\31\7\0\3\132\1\0"+
+    "\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\2\31\1\0"+
+    "\5\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132\10\0"+
+    "\2\132\4\0\2\31\1\0\3\31\2\132\2\0\12\20\1\0\1\31"+
+    "\20\0\1\132\1\31\1\0\6\31\3\0\3\31\1\0\4\31\3\0"+
+    "\2\31\1\0\1\31\1\0\2\31\3\0\2\31\3\0\3\31\3\0"+
+    "\14\31\4\0\5\132\3\0\3\132\1\0\4\132\2\0\1\31\6\0"+
+    "\1\132\16\0\12\20\20\0\4\132\1\0\10\31\1\0\3\31\1\0"+
+    "\27\31\1\0\20\31\3\0\1\31\7\132\1\0\3\132\1\0\4\132"+
+    "\7\0\2\132\1\0\3\31\5\0\2\31\2\132\2\0\12\20\20\0"+
+    "\1\31\3\132\1\0\10\31\1\0\3\31\1\0\27\31\1\0\12\31"+
+    "\1\0\5\31\2\0\1\132\1\31\7\132\1\0\3\132\1\0\4\132"+
+    "\7\0\2\132\7\0\1\31\1\0\2\31\2\132\2\0\12\20\1\0"+
+    "\2\31\16\0\3\132\1\0\10\31\1\0\3\31\1\0\51\31\2\0"+
+    "\1\31\7\132\1\0\3\132\1\0\4\132\1\31\5\0\3\31\1\132"+
+    "\7\0\3\31\2\132\2\0\12\20\12\0\6\31\2\0\2\132\1\0"+
+    "\22\31\3\0\30\31\1\0\11\31\1\0\1\31\2\0\7\31\3\0"+
+    "\1\132\4\0\6\132\1\0\1\132\1\0\10\132\6\0\12\20\2\0"+
+    "\2\132\15\0\60\137\1\37\2\137\7\37\5\0\7\137\10\37\1\0"+
+    "\12\20\47\0\2\137\1\0\1\137\2\0\2\137\1\0\1\137\2\0"+
+    "\1\137\6\0\4\137\1\0\7\137\1\0\3\137\1\0\1\137\1\0"+
+    "\1\137\2\0\2\137\1\0\4\137\1\37\2\137\6\37\1\0\2\37"+
+    "\1\137\2\0\5\137\1\0\1\137\1\0\6\37\2\0\12\20\2\0"+
+    "\4\137\40\0\1\31\27\0\2\132\6\0\12\20\13\0\1\132\1\0"+
+    "\1\132\1\0\1\132\4\0\2\132\10\31\1\0\44\31\4\0\24\132"+
+    "\1\0\2\132\5\31\13\132\1\0\44\132\11\0\1\132\71\0\53\137"+
+    "\24\37\1\137\12\20\6\0\6\137\4\37\4\137\3\37\1\137\3\37"+
+    "\2\137\7\37\3\137\4\37\15\137\14\37\1\137\1\37\12\20\4\37"+
+    "\2\36\46\31\1\0\1\31\5\0\1\31\2\0\53\31\1\0\4\31"+
+    "\u0100\17\111\31\1\0\4\31\2\0\7\31\1\0\1\31\1\0\4\31"+
+    "\2\0\51\31\1\0\4\31\2\0\41\31\1\0\4\31\2\0\7\31"+
+    "\1\0\1\31\1\0\4\31\2\0\17\31\1\0\71\31\1\0\4\31"+
+    "\2\0\103\31\2\0\3\132\40\0\20\31\20\0\126\31\2\0\6\31"+
+    "\3\0\u026c\31\2\0\21\31\1\0\32\31\5\0\113\31\3\0\13\31"+
+    "\7\0\15\31\1\0\4\31\3\132\13\0\22\31\3\132\13\0\22\31"+
+    "\2\132\14\0\15\31\1\0\3\31\1\0\2\132\14\0\64\137\40\37"+
+    "\3\0\1\137\4\0\1\137\1\37\2\0\12\20\41\0\3\132\1\1"+
+    "\1\0\12\20\6\0\130\31\10\0\5\31\2\132\42\31\1\132\1\31"+
+    "\5\0\106\31\12\0\37\31\1\0\14\132\4\0\14\132\12\0\12\20"+
+    "\36\137\2\0\5\137\13\0\54\137\4\0\32\137\6\0\12\20\1\36"+
+    "\3\0\2\36\40\0\27\31\5\132\4\0\65\137\12\37\1\0\35\37"+
+    "\2\0\1\132\12\20\6\0\12\20\6\0\7\36\1\137\6\36\2\0"+
+    "\17\132\101\0\5\132\57\31\21\132\7\31\4\0\12\20\21\0\11\132"+
+    "\14\0\3\132\36\31\15\132\2\31\12\20\54\31\16\132\14\0\44\31"+
+    "\24\132\10\0\12\20\3\0\3\31\12\20\44\31\2\0\11\31\107\0"+
+    "\3\132\1\0\25\132\4\31\1\132\4\31\3\132\2\31\1\0\2\132"+
+    "\6\0\300\31\66\132\5\0\5\132\u0116\31\2\0\6\31\2\0\46\31"+
+    "\2\0\6\31\2\0\10\31\1\0\1\31\1\0\1\31\1\0\1\31"+
+    "\1\0\37\31\2\0\65\31\1\0\7\31\1\0\1\31\3\0\3\31"+
+    "\1\0\7\31\3\0\4\31\2\0\6\31\4\0\15\31\5\0\3\31"+
+    "\1\0\7\31\17\0\1\1\1\12\2\1\10\0\2\26\12\0\1\26"+
+    "\2\0\1\24\2\0\5\1\1\27\14\0\1\5\2\0\2\134\3\0"+
+    "\1\25\4\0\1\5\12\0\1\134\13\0\5\1\1\0\12\1\1\0"+
+    "\1\31\15\0\1\31\20\0\15\31\63\0\23\132\1\10\15\132\21\0"+
+    "\1\31\4\0\1\31\2\0\12\31\1\0\1\31\3\0\5\31\4\0"+
+    "\1\5\1\0\1\31\1\0\1\31\1\0\1\31\1\0\4\31\1\0"+
+    "\12\31\1\16\2\0\4\31\5\0\5\31\4\0\1\31\21\0\51\31"+
+    "\13\0\6\5\17\0\2\5\u016f\0\2\5\14\0\1\5\137\0\1\5"+
+    "\106\0\1\5\31\0\13\5\4\0\3\5\273\0\14\31\1\16\47\31"+
+    "\300\0\2\5\12\0\1\5\11\0\1\5\72\0\4\5\1\0\5\5"+
+    "\1\5\1\0\7\5\1\5\2\5\1\5\1\5\1\0\2\5\2\5"+
+    "\1\5\4\5\1\4\2\5\1\5\1\5\2\5\2\5\1\5\3\5"+
+    "\1\5\3\5\2\5\10\5\3\5\5\5\1\5\1\5\1\5\5\5"+
+    "\14\5\13\5\2\5\2\5\1\5\1\5\2\5\1\5\1\5\22\5"+
+    "\1\5\2\5\2\5\6\5\12\0\2\5\6\5\1\5\1\5\1\5"+
+    "\2\5\3\5\2\5\10\5\2\5\4\5\2\5\13\5\2\5\5\5"+
+    "\2\5\2\5\1\5\5\5\2\5\1\5\1\5\1\5\2\5\24\5"+
+    "\2\5\5\5\6\5\1\5\2\5\1\4\1\5\2\5\1\5\4\5"+
+    "\1\5\2\5\1\5\2\0\2\5\4\4\1\5\1\5\2\5\1\5"+
+    "\1\0\1\5\1\0\1\5\6\0\1\5\3\0\1\5\6\0\1\5"+
+    "\12\0\2\5\17\0\1\5\2\0\1\5\4\0\1\5\1\0\1\5"+
+    "\4\0\3\5\1\0\1\5\13\0\2\5\3\5\55\0\3\5\11\0"+
+    "\1\5\16\0\1\5\16\0\1\5\u0174\0\2\5\u01cf\0\3\5\23\0"+
+    "\2\5\63\0\1\5\4\0\1\5\252\0\57\31\1\0\57\31\1\0"+
+    "\205\31\6\0\4\31\3\132\2\31\14\0\46\31\1\0\1\31\5\0"+
+    "\1\31\2\0\70\31\7\0\1\31\17\0\1\132\27\31\11\0\7\31"+
+    "\1\0\7\31\1\0\7\31\1\0\7\31\1\0\7\31\1\0\7\31"+
+    "\1\0\7\31\1\0\7\31\1\0\40\132\57\0\1\31\120\0\32\30"+
+    "\1\0\131\30\14\0\326\30\57\0\1\31\1\131\1\135\31\0\11\135"+
+    "\6\132\1\5\5\133\2\0\3\135\1\31\1\31\1\5\3\0\126\136"+
+    "\2\0\2\132\2\23\3\136\1\23\132\133\1\0\4\133\5\0\51\31"+
+    "\3\0\136\17\21\0\33\31\65\0\20\133\227\0\1\5\1\0\1\5"+
+    "\66\0\57\23\1\0\130\23\250\0\u19b6\135\112\0\u51d6\135\52\0\u048d\31"+
+    "\103\0\56\31\2\0\u010d\31\3\0\20\31\12\20\2\31\24\0\57\31"+
+    "\4\132\1\0\12\132\1\0\37\31\2\132\120\31\2\132\45\0\11\31"+
+    "\2\0\147\31\2\0\44\31\1\0\10\31\77\0\13\31\1\132\3\31"+
+    "\1\132\4\31\1\132\27\31\5\132\30\0\64\31\14\0\2\132\62\31"+
+    "\22\132\12\0\12\20\6\0\22\132\6\31\3\0\1\31\1\0\1\31"+
+    "\2\0\12\20\34\31\10\132\2\0\27\31\15\132\14\0\35\17\3\0"+
+    "\4\132\57\31\16\132\16\0\1\31\12\20\6\0\5\137\1\37\12\137"+
+    "\12\20\5\137\1\0\51\31\16\132\11\0\3\31\1\132\10\31\2\132"+
+    "\2\0\12\20\6\0\27\137\3\36\1\137\3\37\62\137\1\37\1\137"+
+    "\3\37\2\137\2\37\5\137\2\37\1\137\1\37\1\137\30\0\3\137"+
+    "\2\36\13\31\5\132\2\0\3\31\2\132\12\0\6\31\2\0\6\31"+
+    "\2\0\6\31\11\0\7\31\1\0\7\31\1\0\53\31\1\0\12\31"+
+    "\12\0\163\31\10\132\1\0\2\132\2\0\12\20\6\0\u2ba4\17\14\0"+
+    "\27\17\4\0\61\17\u2104\0\u016e\135\2\0\152\135\46\0\7\31\14\0"+
+    "\5\31\5\0\1\35\1\132\12\35\1\0\15\35\1\0\5\35\1\0"+
+    "\1\35\1\0\2\35\1\0\2\35\1\0\12\35\142\31\41\0\u016b\31"+
+    "\22\0\100\31\2\0\66\31\50\0\14\31\4\0\16\132\1\6\1\11"+
+    "\1\25\2\0\1\24\1\25\13\0\20\132\3\0\2\134\30\0\3\134"+
+    "\1\25\1\0\1\26\1\0\1\25\1\24\32\0\5\31\1\0\207\31"+
+    "\2\0\1\1\7\0\1\26\4\0\1\25\1\0\1\26\1\0\12\20"+
+    "\1\24\1\25\5\0\32\31\4\0\1\134\1\0\32\31\13\0\70\133"+
+    "\2\132\37\17\3\0\6\17\2\0\6\17\2\0\6\17\2\0\3\17"+
+    "\34\0\3\1\4\0\14\31\1\0\32\31\1\0\23\31\1\0\2\31"+
+    "\1\0\17\31\2\0\16\31\42\0\173\31\105\0\65\31\210\0\1\132"+
+    "\202\0\35\31\3\0\61\31\17\0\1\132\37\0\40\31\20\0\33\31"+
+    "\5\0\46\31\5\132\5\0\36\31\2\0\44\31\4\0\10\31\1\0"+
+    "\5\31\52\0\236\31\2\0\12\20\6\0\44\31\4\0\44\31\4\0"+
+    "\50\31\10\0\64\31\234\0\u0137\31\11\0\26\31\12\0\10\31\230\0"+
+    "\6\31\2\0\1\31\1\0\54\31\1\0\2\31\3\0\1\31\2\0"+
+    "\27\31\12\0\27\31\11\0\37\31\101\0\23\31\1\0\2\31\12\0"+
+    "\26\31\12\0\32\31\106\0\70\31\6\0\2\31\100\0\1\31\3\132"+
+    "\1\0\2\132\5\0\4\132\4\31\1\0\3\31\1\0\33\31\4\0"+
+    "\3\132\4\0\1\132\40\0\35\31\3\0\35\31\43\0\10\31\1\0"+
+    "\34\31\2\132\31\0\66\31\12\0\26\31\12\0\23\31\15\0\22\31"+
+    "\156\0\111\31\67\0\63\31\15\0\63\31\u030d\0\3\132\65\31\17\132"+
+    "\37\0\12\20\17\0\4\132\55\31\13\132\2\0\1\1\22\0\31\31"+
+    "\7\0\12\20\6\0\3\132\44\31\16\132\1\0\12\20\20\0\43\31"+
+    "\1\132\2\0\1\31\11\0\3\132\60\31\16\132\4\31\5\0\3\132"+
+    "\3\0\12\20\1\31\1\0\1\31\43\0\22\31\1\0\31\31\14\132"+
+    "\6\0\1\132\101\0\7\31\1\0\1\31\1\0\4\31\1\0\17\31"+
+    "\1\0\12\31\7\0\57\31\14\132\5\0\12\20\6\0\4\132\1\0"+
+    "\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\2\31\1\0"+
+    "\5\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132\2\0"+
+    "\1\31\6\0\1\132\5\0\5\31\2\132\2\0\7\132\3\0\5\132"+
+    "\213\0\65\31\22\132\4\31\5\0\12\20\46\0\60\31\24\132\2\31"+
+    "\1\0\1\31\10\0\12\20\246\0\57\31\7\132\2\0\11\132\27\0"+
+    "\4\31\2\132\42\0\60\31\21\132\3\0\1\31\13\0\12\20\46\0"+
+    "\53\31\15\132\10\0\12\20\66\0\32\137\3\0\17\37\4\0\12\20"+
+    "\2\36\3\0\1\36\u0160\0\100\31\12\20\25\0\1\31\u01c0\0\71\31"+
+    "\u0107\0\11\31\1\0\45\31\10\132\1\0\10\132\1\31\17\0\12\20"+
+    "\30\0\36\31\2\0\26\132\1\0\16\132\u0349\0\u039a\31\146\0\157\31"+
+    "\21\0\304\31\u0abc\0\u042f\31\u0fd1\0\u0247\31\u21b9\0\u0239\31\7\0\37\31"+
+    "\1\0\12\20\146\0\36\31\2\0\5\132\13\0\60\31\7\132\11\0"+
+    "\4\31\14\0\12\20\11\0\25\31\5\0\23\31\u0370\0\105\31\13\0"+
+    "\1\31\56\132\20\0\4\132\15\31\100\0\1\31\37\0\u17ed\131\23\0"+
+    "\u02f3\131\u250d\0\1\133\1\136\u0bfe\0\153\31\5\0\15\31\3\0\11\31"+
+    "\7\0\12\31\3\0\2\132\1\0\4\1\u14c1\0\5\132\3\0\6\132"+
+    "\10\1\10\132\2\0\7\132\36\0\4\132\224\0\3\132\u01bb\0\125\31"+
+    "\1\0\107\31\1\0\2\31\2\0\1\31\2\0\2\31\2\0\4\31"+
+    "\1\0\14\31\1\0\1\31\1\0\7\31\1\0\101\31\1\0\4\31"+
+    "\2\0\10\31\1\0\7\31\1\0\34\31\1\0\4\31\1\0\5\31"+
+    "\1\0\1\31\3\0\7\31\1\0\u0154\31\2\0\31\31\1\0\31\31"+
+    "\1\0\37\31\1\0\31\31\1\0\37\31\1\0\31\31\1\0\37\31"+
+    "\1\0\31\31\1\0\37\31\1\0\31\31\1\0\10\31\2\0\62\20"+
+    "\u0200\0\67\132\4\0\62\132\10\0\1\132\16\0\1\132\26\0\5\132"+
+    "\1\0\17\132\u0550\0\7\132\1\0\21\132\2\0\7\132\1\0\2\132"+
+    "\1\0\5\132\u07d5\0\305\31\13\0\7\132\51\0\104\31\7\132\5\0"+
+    "\12\20\u04a6\0\4\31\1\0\33\31\1\0\2\31\1\0\1\31\2\0"+
+    "\1\31\1\0\12\31\1\0\4\31\1\0\1\31\1\0\1\31\6\0"+
+    "\1\31\4\0\1\31\1\0\1\31\1\0\1\31\1\0\3\31\1\0"+
+    "\2\31\1\0\1\31\2\0\1\31\1\0\1\31\1\0\1\31\1\0"+
+    "\1\31\1\0\1\31\1\0\2\31\1\0\1\31\2\0\4\31\1\0"+
+    "\7\31\1\0\4\31\1\0\4\31\1\0\1\31\1\0\12\31\1\0"+
+    "\21\31\5\0\3\31\1\0\5\31\1\0\21\31\u0144\0\4\5\1\5"+
+    "\312\5\1\5\60\5\15\0\3\5\37\0\1\5\32\31\6\0\32\31"+
+    "\2\0\4\5\2\16\14\31\2\16\12\31\4\0\1\5\2\0\12\5"+
+    "\22\0\71\5\32\2\1\32\2\5\15\5\12\0\1\5\24\0\1\5"+
+    "\2\0\11\5\1\0\4\5\11\0\7\5\2\5\256\5\42\5\2\5"+
+    "\141\5\1\4\16\5\2\5\2\5\1\5\3\5\2\5\44\5\3\4"+
+    "\2\5\1\4\2\5\3\4\44\5\2\5\3\5\1\5\4\5\5\3"+
+    "\102\5\2\4\2\5\13\4\25\5\4\4\4\5\1\4\1\5\11\4"+
+    "\3\5\1\4\4\5\3\4\1\5\3\4\42\5\1\4\123\5\1\5"+
+    "\77\5\10\0\3\5\6\5\1\5\30\5\7\5\2\5\2\5\1\5"+
+    "\2\4\4\5\1\4\14\5\1\5\2\5\4\5\2\5\1\4\4\5"+
+    "\2\4\15\5\2\5\2\5\1\5\10\5\2\5\11\5\1\5\5\5"+
+    "\3\5\14\5\3\5\10\5\3\5\2\5\1\5\1\5\1\5\4\5"+
+    "\1\5\6\5\1\5\3\5\1\5\6\5\113\5\3\4\3\5\5\4"+
+    "\60\0\43\5\1\4\20\5\3\4\11\5\1\4\5\5\5\5\1\5"+
+    "\1\4\6\5\15\5\6\5\3\5\1\5\1\5\2\5\3\5\1\5"+
+    "\2\5\7\5\6\5\164\0\14\5\125\0\53\5\14\0\4\5\70\0"+
+    "\10\5\12\0\6\5\50\0\10\5\36\0\122\5\14\0\4\5\10\5"+
+    "\5\4\1\5\2\4\6\5\1\4\11\5\12\4\1\5\1\0\1\5"+
+    "\2\4\1\5\6\5\1\0\52\5\2\5\4\5\3\5\1\5\1\5"+
+    "\47\5\15\5\5\5\2\4\1\5\2\4\6\5\3\5\15\5\1\5"+
+    "\15\4\42\5\u05fe\5\2\0\ua6d7\135\51\0\u1035\135\13\0\336\135\2\0"+
+    "\u1682\135\u295e\0\u021e\135\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
+    "\1\1\36\0\137\13\1\14\200\0\360\132\uffff\0\uffff\0\ufe12\0";
 
   /** 
    * Translates characters to character classes
@@ -231,27 +297,85 @@ public final class UAX29URLEmailTokenizerImpl {
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\2\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
-    "\1\7\2\1\1\2\1\1\1\10\4\2\3\4\2\1"+
-    "\4\2\3\4\1\1\1\2\1\0\1\2\1\0\1\4"+
-    "\1\0\1\2\6\0\1\2\2\0\1\1\3\0\6\2"+
-    "\2\0\3\4\1\2\1\4\5\0\5\2\1\0\2\4"+
-    "\6\0\32\2\3\0\5\2\32\0\4\4\5\0\32\2"+
-    "\2\0\4\2\32\0\4\4\5\0\1\11\1\0\1\2"+
-    "\1\12\2\2\2\12\1\2\10\12\1\2\2\12\1\2"+
-    "\4\12\1\2\1\12\1\2\2\12\2\2\6\12\2\2"+
-    "\1\12\1\2\1\12\1\2\11\12\3\2\3\12\1\2"+
-    "\3\12\2\2\2\12\1\2\3\12\7\2\1\12\10\2"+
-    "\1\12\4\2\1\12\1\2\2\12\1\2\2\12\2\2"+
-    "\1\12\1\2\1\12\1\2\3\12\3\2\1\12\1\2"+
-    "\1\12\1\2\1\12\1\2\1\12\2\2\1\12\2\2"+
-    "\2\12\1\2\3\12\1\2\5\12\1\2\1\12\1\2"+
-    "\1\12\3\2\4\12\1\2\6\12\4\2\1\12\2\2"+
-    "\1\12\6\2\1\12\1\2\2\12\1\2\5\12\3\2"+
-    "\2\12\2\2\5\12\1\2\14\12\1\2\1\12\2\2"+
-    "\3\12\5\2\2\12\1\2\2\12\2\2\2\12\26\2"+
-    "\3\12\3\2\1\12\1\2\2\0\2\2\1\0\1\2"+
-    "\1\0\1\12\2\0\2\12\1\0\10\12\1\0\2\12"+
+    "\2\0\2\1\3\2\2\1\1\3\1\2\1\4\2\5"+
+    "\1\6\1\1\1\7\1\10\2\1\1\3\1\11\4\3"+
+    "\2\5\2\1\1\5\4\3\2\5\1\1\1\0\5\2"+
+    "\2\0\1\2\4\0\1\3\1\0\1\3\2\2\1\0"+
+    "\2\5\1\2\2\5\3\0\1\5\1\0\1\3\3\0"+
+    "\1\3\1\0\1\3\2\0\5\3\3\5\1\3\3\0"+
+    "\1\5\2\0\1\3\1\0\4\3\2\5\2\0\2\2"+
+    "\2\0\1\2\3\0\2\3\2\2\2\5\32\0\2\5"+
+    "\4\0\37\3\5\0\2\5\32\0\2\5\2\0\36\3"+
+    "\3\0\3\2\1\0\1\2\2\0\1\12\1\0\1\3"+
+    "\3\2\1\0\1\5\3\0\1\13\2\0\2\13\1\0"+
+    "\10\13\1\0\2\13\1\0\4\13\1\0\1\13\1\0"+
+    "\2\13\2\0\6\13\2\0\1\13\1\0\1\13\1\0"+
+    "\11\13\3\0\3\13\1\0\3\13\2\0\2\13\1\0"+
+    "\3\13\7\0\1\13\10\0\1\13\4\0\1\13\1\0"+
+    "\2\13\1\0\2\13\2\0\1\13\1\0\1\13\1\0"+
+    "\3\13\3\0\1\13\1\0\1\13\1\0\1\13\1\0"+
+    "\1\13\2\0\1\13\2\0\2\13\1\0\3\13\1\0"+
+    "\5\13\1\0\1\13\1\0\1\13\3\0\4\13\1\0"+
+    "\6\13\4\0\1\13\2\0\1\13\6\0\1\13\1\0"+
+    "\2\13\1\0\5\13\3\0\2\13\2\0\5\13\1\0"+
+    "\14\13\1\0\1\13\2\0\3\13\5\0\2\13\1\0"+
+    "\2\13\2\0\2\13\26\0\3\13\3\0\1\13\1\0"+
+    "\1\5\1\3\1\13\2\3\2\13\1\3\10\13\1\3"+
+    "\2\13\1\3\4\13\1\3\1\13\1\3\2\13\2\3"+
+    "\6\13\2\3\1\13\1\3\1\13\1\3\11\13\3\3"+
+    "\3\13\1\3\3\13\2\3\2\13\1\3\3\13\7\3"+
+    "\1\13\10\3\1\13\4\3\1\13\1\3\2\13\1\3"+
+    "\2\13\2\3\1\13\1\3\1\13\1\3\3\13\3\3"+
+    "\1\13\1\3\1\13\1\3\1\13\1\3\1\13\2\3"+
+    "\1\13\2\3\2\13\1\3\3\13\1\3\5\13\1\3"+
+    "\1\13\1\3\1\13\3\3\4\13\1\3\6\13\4\3"+
+    "\1\13\2\3\1\13\6\3\1\13\1\3\2\13\1\3"+
+    "\5\13\3\3\2\13\2\3\5\13\1\3\14\13\1\3"+
+    "\1\13\2\3\3\13\5\3\2\13\1\3\2\13\2\3"+
+    "\2\13\26\3\3\13\3\3\1\13\1\3\1\0\2\3"+
+    "\1\0\1\3\7\0\1\13\1\0\1\5\375\0\1\5"+
+    "\376\3\42\0\3\5\1\14\1\15\1\16\1\15\2\14"+
+    "\5\15\1\14\1\15\1\14\2\16\1\0\1\13\1\0"+
+    "\1\13\2\0\2\15\1\0\26\15\3\0\5\15\3\0"+
+    "\4\15\2\0\7\15\2\13\4\15\1\0\6\15\1\13"+
+    "\2\15\1\13\4\15\5\0\1\13\2\0\3\15\1\13"+
+    "\13\15\1\13\12\15\6\0\1\15\3\0\13\15\5\0"+
+    "\1\13\4\0\2\15\3\0\1\13\14\0\1\15\4\0"+
+    "\1\13\4\0\1\15\1\13\2\0\1\13\1\0\2\15"+
+    "\3\0\11\15\1\0\2\15\1\0\1\15\1\0\3\15"+
+    "\1\13\4\15\1\13\2\15\2\0\2\15\1\0\1\15"+
+    "\1\0\1\13\1\15\5\0\2\15\4\0\1\13\1\15"+
+    "\3\13\21\15\1\13\6\15\1\0\7\15\1\13\5\15"+
+    "\2\13\4\0\12\15\4\0\4\15\1\13\6\15\1\13"+
+    "\2\0\12\15\4\0\4\15\5\0\6\15\7\0\5\15"+
+    "\1\13\6\15\1\13\2\15\1\0\1\13\2\0\4\15"+
+    "\1\13\1\15\1\13\3\15\5\0\1\13\1\0\5\15"+
+    "\1\13\3\15\1\13\2\15\1\13\5\15\3\0\3\15"+
+    "\1\13\20\15\1\13\16\15\1\13\6\0\3\15\1\13"+
+    "\2\15\1\13\3\15\11\0\1\15\3\0\3\15\1\0"+
+    "\2\15\1\13\16\0\1\13\3\0\1\13\1\0\5\15"+
+    "\2\0\1\14\2\15\1\14\1\15\3\14\1\15\1\14"+
+    "\5\15\1\14\2\15\1\3\1\13\1\3\1\13\2\3"+
+    "\2\15\1\3\26\15\3\3\5\15\3\3\4\15\2\3"+
+    "\7\15\2\13\4\15\1\3\6\15\1\13\2\15\1\13"+
+    "\4\15\5\3\1\13\2\3\3\15\1\13\13\15\1\13"+
+    "\12\15\6\3\1\15\3\3\13\15\5\3\1\13\4\3"+
+    "\2\15\3\3\1\13\14\3\1\15\4\3\1\13\4\3"+
+    "\1\15\1\13\2\3\1\13\1\3\2\15\3\3\11\15"+
+    "\1\3\2\15\1\3\1\15\1\3\3\15\1\13\4\15"+
+    "\1\13\2\15\2\3\2\15\1\3\1\15\1\3\1\13"+
+    "\1\15\5\3\2\15\4\3\1\13\1\15\3\13\21\15"+
+    "\1\13\6\15\1\3\7\15\1\13\5\15\2\13\4\3"+
+    "\12\15\4\3\4\15\1\13\6\15\1\13\2\3\12\15"+
+    "\4\3\4\15\5\3\6\15\7\3\5\15\1\13\6\15"+
+    "\1\13\2\15\1\3\1\13\2\3\4\15\1\13\1\15"+
+    "\1\13\3\15\5\3\1\13\1\3\5\15\1\13\3\15"+
+    "\1\13\2\15\1\13\5\15\3\3\3\15\1\13\20\15"+
+    "\1\13\16\15\1\13\6\3\3\15\1\13\2\15\1\13"+
+    "\3\15\11\3\1\15\3\3\3\15\1\3\2\15\1\13"+
+    "\15\3\1\13\3\3\1\13\1\3\5\15\3\3\1\0"+
+    "\1\3\17\0\3\16\3\5\1\14\u01d9\0\1\14\u01da\3"+
+    "\16\0\1\12\2\0\2\12\1\0\10\12\1\0\2\12"+
     "\1\0\4\12\1\0\1\12\1\0\2\12\2\0\6\12"+
     "\2\0\1\12\1\0\1\12\1\0\11\12\3\0\3\12"+
     "\1\0\3\12\2\0\2\12\1\0\3\12\7\0\1\12"+
@@ -263,186 +387,128 @@ public final class UAX29URLEmailTokenizerImpl {
     "\2\0\1\12\6\0\1\12\1\0\2\12\1\0\5\12"+
     "\3\0\2\12\2\0\5\12\1\0\14\12\1\0\1\12"+
     "\2\0\3\12\5\0\2\12\1\0\2\12\2\0\2\12"+
-    "\26\0\3\12\3\0\1\12\2\0\2\4\10\0\1\12"+
-    "\373\2\1\0\3\2\374\0\2\4\43\0\2\13\2\14"+
-    "\2\13\1\14\1\13\1\14\1\13\7\14\2\13\1\14"+
-    "\1\13\3\15\1\14\1\2\1\12\1\2\1\12\2\2"+
-    "\2\14\1\2\26\14\3\2\5\14\3\2\4\14\2\2"+
-    "\7\14\2\12\4\14\1\2\6\14\1\12\2\14\1\12"+
-    "\4\14\5\2\1\12\2\2\3\14\1\12\13\14\1\12"+
-    "\12\14\6\2\1\14\3\2\13\14\5\2\1\12\4\2"+
-    "\2\14\3\2\1\12\14\2\1\14\4\2\1\12\4\2"+
-    "\1\14\1\12\2\2\1\12\1\2\2\14\3\2\11\14"+
-    "\1\2\2\14\1\2\1\14\1\2\3\14\1\12\4\14"+
-    "\1\12\2\14\2\2\2\14\1\2\1\14\1\2\1\12"+
-    "\1\14\5\2\2\14\4\2\1\12\1\14\3\12\21\14"+
-    "\1\12\6\14\1\2\7\14\1\12\5\14\2\12\4\2"+
-    "\12\14\4\2\4\14\1\12\6\14\1\12\2\2\12\14"+
-    "\4\2\4\14\5\2\6\14\7\2\5\14\1\12\6\14"+
-    "\1\12\2\14\1\2\1\12\2\2\4\14\1\12\1\14"+
-    "\1\12\3\14\5\2\1\12\1\2\5\14\1\12\3\14"+
-    "\1\12\2\14\1\12\5\14\3\2\3\14\1\12\20\14"+
-    "\1\12\16\14\1\12\6\2\3\14\1\12\2\14\1\12"+
-    "\3\14\11\2\1\14\3\2\3\14\1\2\2\14\1\12"+
-    "\2\2\1\0\13\2\1\12\3\2\1\12\1\2\5\14"+
-    "\3\2\1\0\1\2\2\0\1\13\5\14\1\13\1\14"+
-    "\1\0\1\12\1\0\1\12\2\0\2\14\1\0\26\14"+
-    "\3\0\5\14\3\0\4\14\2\0\7\14\2\12\4\14"+
-    "\1\0\6\14\1\12\2\14\1\12\4\14\5\0\1\12"+
-    "\2\0\3\14\1\12\13\14\1\12\12\14\6\0\1\14"+
-    "\3\0\13\14\5\0\1\12\4\0\2\14\3\0\1\12"+
-    "\14\0\1\14\4\0\1\12\4\0\1\14\1\12\2\0"+
-    "\1\12\1\0\2\14\3\0\11\14\1\0\2\14\1\0"+
-    "\1\14\1\0\3\14\1\12\4\14\1\12\2\14\2\0"+
-    "\2\14\1\0\1\14\1\0\1\12\1\14\5\0\2\14"+
-    "\4\0\1\12\1\14\3\12\21\14\1\12\6\14\1\0"+
-    "\7\14\1\12\5\14\2\12\4\0\12\14\4\0\4\14"+
-    "\1\12\6\14\1\12\2\0\12\14\4\0\4\14\5\0"+
-    "\6\14\7\0\5\14\1\12\6\14\1\12\2\14\1\0"+
-    "\1\12\2\0\4\14\1\12\1\14\1\12\3\14\5\0"+
-    "\1\12\1\0\5\14\1\12\3\14\1\12\2\14\1\12"+
-    "\5\14\3\0\3\14\1\12\20\14\1\12\16\14\1\12"+
-    "\6\0\3\14\1\12\2\14\1\12\3\14\11\0\1\14"+
-    "\3\0\3\14\1\0\2\14\1\12\15\0\1\12\3\0"+
-    "\1\12\1\0\5\14\2\0\3\4\15\0\3\15\1\13"+
-    "\u01c1\2\1\0\31\2\1\13\u01d8\0\3\4\16\0\1\11"+
-    "\2\0\2\11\1\0\10\11\1\0\2\11\1\0\4\11"+
-    "\1\0\1\11\1\0\2\11\2\0\6\11\2\0\1\11"+
-    "\1\0\1\11\1\0\11\11\3\0\3\11\1\0\3\11"+
-    "\2\0\2\11\1\0\3\11\7\0\1\11\10\0\1\11"+
-    "\4\0\1\11\1\0\2\11\1\0\2\11\2\0\1\11"+
-    "\1\0\1\11\1\0\3\11\3\0\1\11\1\0\1\11"+
-    "\1\0\1\11\1\0\1\11\2\0\1\11\2\0\2\11"+
-    "\1\0\3\11\1\0\5\11\1\0\1\11\1\0\1\11"+
-    "\3\0\4\11\1\0\6\11\4\0\1\11\2\0\1\11"+
-    "\6\0\1\11\1\0\2\11\1\0\5\11\3\0\2\11"+
-    "\2\0\5\11\1\0\14\11\1\0\1\11\2\0\3\11"+
-    "\5\0\2\11\1\0\2\11\2\0\2\11\26\0\3\11"+
-    "\3\0\1\11\1\0\1\13\1\14\1\13\32\14\2\15"+
-    "\1\0\2\15\1\0\2\15\1\0\1\15\1\2\2\14"+
-    "\24\2\1\12\14\2\1\12\11\2\2\14\2\12\10\2"+
-    "\3\14\1\12\1\14\7\2\1\12\1\2\2\14\21\2"+
-    "\1\12\24\2\1\12\1\14\5\2\2\14\12\2\1\12"+
-    "\4\2\1\14\3\2\1\14\1\2\1\14\15\2\1\12"+
-    "\2\14\5\2\1\14\6\2\1\14\13\2\2\14\1\12"+
-    "\2\2\4\14\1\2\1\12\20\2\4\14\5\2\1\12"+
-    "\3\2\1\12\1\2\2\14\4\2\1\12\1\2\1\14"+
-    "\5\2\1\12\4\2\1\14\3\2\1\14\1\12\11\2"+
-    "\1\12\4\2\1\12\7\2\1\14\3\2\2\12\1\14"+
-    "\2\2\1\14\12\2\1\14\5\2\2\14\1\2\3\14"+
-    "\1\2\1\12\1\14\4\2\1\12\2\2\1\12\5\2"+
-    "\1\14\3\2\1\12\1\2\1\12\20\2\1\12\6\2"+
-    "\1\14\1\12\1\2\1\12\1\2\1\14\21\2\1\0"+
-    "\4\2\1\12\4\2\1\14\7\2\1\0\1\2\2\0"+
-    "\32\14\1\0\2\14\24\0\1\12\14\0\1\12\11\0"+
-    "\2\14\2\12\10\0\3\14\1\12\1\14\7\0\1\12"+
-    "\1\0\2\14\21\0\1\12\24\0\1\12\1\14\5\0"+
-    "\2\14\12\0\1\12\4\0\1\14\3\0\1\14\1\0"+
-    "\1\14\15\0\1\12\2\14\5\0\1\14\6\0\1\14"+
-    "\13\0\2\14\1\12\2\0\4\14\1\0\1\12\20\0"+
-    "\4\14\5\0\1\12\3\0\1\12\1\0\2\14\4\0"+
-    "\1\12\1\0\1\14\5\0\1\12\4\0\1\14\3\0"+
-    "\1\14\1\12\11\0\1\12\4\0\1\12\7\0\1\14"+
-    "\3\0\2\12\1\14\2\0\1\14\12\0\1\14\5\0"+
-    "\2\14\1\0\3\14\1\0\1\12\1\14\4\0\1\12"+
-    "\2\0\1\12\5\0\1\14\3\0\1\12\1\0\1\12"+
-    "\20\0\1\12\6\0\1\14\1\12\1\0\1\12\1\0"+
-    "\1\14\25\0\1\12\4\0\1\14\10\0\2\4\20\0"+
-    "\u0155\2\1\0\20\2\u0165\0\2\4\15\0\1\11\4\0"+
-    "\1\11\1\0\1\11\60\0\2\11\10\0\1\11\2\0"+
-    "\1\11\11\0\1\11\5\0\1\11\13\0\1\11\42\0"+
-    "\1\11\11\0\1\11\21\0\1\11\5\0\1\11\2\0"+
-    "\1\11\26\0\1\11\4\0\1\11\7\0\1\11\14\0"+
-    "\1\11\1\0\3\11\20\0\1\11\15\0\1\11\5\0"+
-    "\2\11\24\0\1\11\4\0\1\11\52\0\1\11\6\0"+
-    "\1\11\3\0\1\11\5\0\1\11\1\0\1\11\10\0"+
-    "\1\11\6\0\1\11\3\0\1\11\2\0\1\11\13\0"+
-    "\1\11\16\0\1\11\16\0\1\11\11\0\1\11\2\0"+
-    "\1\11\24\0\1\11\16\0\1\11\3\0\1\11\7\0"+
-    "\1\15\3\0\22\2\1\14\7\2\1\14\6\2\1\14"+
-    "\2\2\1\12\3\2\1\14\14\2\1\14\1\12\10\2"+
-    "\1\14\7\2\1\12\17\2\1\12\4\2\1\14\22\2"+
-    "\1\14\17\2\1\14\1\2\1\14\6\2\1\14\1\2"+
-    "\2\14\6\2\1\14\16\2\2\14\16\2\1\14\5\2"+
-    "\1\14\4\2\1\14\17\2\1\14\10\2\41\0\1\12"+
-    "\1\14\7\2\33\0\1\14\7\0\1\14\6\0\1\14"+
-    "\2\0\1\12\3\0\1\14\14\0\1\14\1\12\10\0"+
-    "\1\14\7\0\1\12\17\0\1\12\4\0\1\14\22\0"+
-    "\1\14\17\0\1\14\1\0\1\14\6\0\1\14\1\0"+
-    "\2\14\6\0\1\14\16\0\2\14\16\0\1\14\5\0"+
-    "\1\14\4\0\1\14\17\0\1\14\10\0\1\12\1\14"+
-    "\7\0\3\12\20\0\312\2\41\0\11\2\323\0\3\4"+
-    "\42\0\1\11\11\0\1\11\12\0\2\11\12\0\1\11"+
-    "\10\0\1\11\22\0\1\11\23\0\1\11\22\0\1\11"+
-    "\23\0\1\11\31\0\1\11\7\0\1\11\26\0\1\11"+
-    "\3\0\1\11\6\0\1\11\7\0\1\11\11\0\1\11"+
-    "\11\0\1\11\4\0\1\11\13\0\2\11\27\0\1\11"+
-    "\5\0\1\11\2\0\1\11\10\0\1\11\1\0\1\11"+
-    "\20\0\1\11\5\0\1\11\1\0\1\11\24\0\1\11"+
-    "\13\0\1\15\24\2\1\14\10\2\1\14\4\2\1\12"+
-    "\5\2\1\14\1\2\1\12\3\2\1\14\22\2\1\12"+
-    "\17\2\1\12\2\2\1\12\34\2\142\0\4\2\1\0"+
-    "\1\15\6\0\1\2\3\0\1\15\6\0\1\15\25\0"+
-    "\1\14\10\0\1\14\4\0\1\12\5\0\1\14\1\0"+
-    "\1\12\3\0\1\14\22\0\1\12\17\0\1\12\2\0"+
-    "\1\12\37\0\1\13\1\14\1\13\1\14\2\13\4\14"+
-    "\14\0\151\2\142\0\4\2\154\0\1\13\2\4\56\0"+
-    "\1\11\20\0\1\11\20\0\1\11\16\0\1\11\227\0"+
-    "\1\11\10\0\1\15\23\2\2\14\1\2\1\14\11\2"+
-    "\1\12\2\2\2\14\1\12\4\2\1\14\3\2\1\12"+
-    "\11\2\142\0\2\2\5\0\1\15\10\0\2\2\42\0"+
-    "\2\14\1\0\1\14\11\0\1\12\2\0\2\14\1\12"+
-    "\4\0\1\14\3\0\1\12\13\0\1\14\14\0\62\2"+
-    "\142\0\2\2\137\0\1\11\7\0\1\11\24\0\1\11"+
-    "\17\0\1\11\2\0\1\11\176\0\1\15\20\2\1\14"+
-    "\5\2\160\0\2\2\1\15\54\0\1\14\23\0\24\2"+
-    "\157\0\2\2\76\0\1\11\3\0\1\11\7\0\1\11"+
-    "\155\0\6\2\1\12\10\2\63\0\1\12\42\0\1\2"+
-    "\1\15\7\0\1\15\2\0\1\16\37\0\1\12\24\0"+
-    "\16\2\126\0\1\2\237\0\12\2\41\0\1\14\22\0"+
-    "\1\2\1\15\130\0\12\2\63\0\1\2\31\0\1\11"+
-    "\73\0\1\11\43\0\5\2\5\0\1\12\14\0\1\12"+
-    "\26\0\1\15\16\0\1\16\2\0\2\16\1\0\10\16"+
-    "\1\0\2\16\1\0\4\16\1\0\1\16\1\0\2\16"+
-    "\2\0\6\16\2\0\1\16\1\0\1\16\1\0\11\16"+
-    "\3\0\3\16\1\0\3\16\2\0\2\16\1\0\3\16"+
-    "\7\0\1\16\10\0\1\16\4\0\1\16\1\0\2\16"+
-    "\1\0\2\16\2\0\1\16\1\0\1\16\1\0\3\16"+
-    "\3\0\1\16\1\0\1\16\1\0\1\16\1\0\1\16"+
-    "\2\0\1\16\2\0\2\16\1\0\3\16\1\0\5\16"+
-    "\1\0\1\16\1\0\1\16\3\0\4\16\1\0\6\16"+
-    "\4\0\1\16\2\0\1\16\6\0\1\16\1\0\2\16"+
-    "\1\0\5\16\3\0\2\16\2\0\5\16\1\0\14\16"+
-    "\1\0\1\16\2\0\3\16\5\0\2\16\1\0\2\16"+
-    "\2\0\2\16\26\0\3\16\3\0\1\16\54\0\4\2"+
-    "\164\0\1\2\14\0\1\14\20\0\1\15\15\0\1\16"+
-    "\4\0\1\16\1\0\1\16\60\0\2\16\10\0\1\16"+
-    "\2\0\1\16\11\0\1\16\5\0\1\16\13\0\1\16"+
-    "\42\0\1\16\11\0\1\16\21\0\1\16\5\0\1\16"+
-    "\2\0\1\16\26\0\1\16\4\0\1\16\7\0\1\16"+
-    "\14\0\1\16\1\0\3\16\20\0\1\16\15\0\1\16"+
-    "\5\0\2\16\24\0\1\16\4\0\1\16\52\0\1\16"+
-    "\6\0\1\16\3\0\1\16\5\0\1\16\1\0\1\16"+
-    "\10\0\1\16\6\0\1\16\3\0\1\16\2\0\1\16"+
-    "\13\0\1\16\16\0\1\16\16\0\1\16\11\0\1\16"+
-    "\2\0\1\16\24\0\1\16\16\0\1\16\3\0\1\16"+
-    "\57\0\1\2\61\0\1\11\14\0\1\11\26\0\1\2"+
-    "\62\0\1\16\11\0\1\16\12\0\2\16\12\0\1\16"+
-    "\10\0\1\16\22\0\1\16\23\0\1\16\22\0\1\16"+
-    "\23\0\1\16\31\0\1\16\7\0\1\16\26\0\1\16"+
-    "\3\0\1\16\6\0\1\16\7\0\1\16\11\0\1\16"+
-    "\11\0\1\16\4\0\1\16\13\0\2\16\27\0\1\16"+
-    "\5\0\1\16\2\0\1\16\10\0\1\16\1\0\1\16"+
-    "\20\0\1\16\5\0\1\16\1\0\1\16\24\0\1\16"+
-    "\13\0\1\15\42\0\1\2\67\0\1\2\70\0\1\16"+
-    "\20\0\1\16\20\0\1\16\16\0\1\16\227\0\1\16"+
-    "\45\0\1\2\41\0\1\2\61\0\1\16\7\0\1\16"+
-    "\24\0\1\16\17\0\1\16\2\0\1\16\325\0\1\16"+
-    "\3\0\1\16\7\0\1\16\u0144\0\1\16\73\0\1\16"+
-    "\273\0\1\16\14\0\1\16\224\0";
+    "\26\0\3\12\3\0\1\12\1\0\1\5\1\0\1\5"+
+    "\1\16\1\0\1\16\2\14\32\15\2\16\1\0\2\16"+
+    "\1\0\1\16\1\0\2\15\24\0\1\13\14\0\1\13"+
+    "\11\0\2\15\2\13\10\0\3\15\1\13\1\15\7\0"+
+    "\1\13\1\0\2\15\21\0\1\13\24\0\1\13\1\15"+
+    "\5\0\2\15\12\0\1\13\4\0\1\15\3\0\1\15"+
+    "\1\0\1\15\15\0\1\13\2\15\5\0\1\15\6\0"+
+    "\1\15\13\0\2\15\1\13\2\0\4\15\1\0\1\13"+
+    "\20\0\4\15\5\0\1\13\3\0\1\13\1\0\2\15"+
+    "\4\0\1\13\1\0\1\15\5\0\1\13\4\0\1\15"+
+    "\3\0\1\15\1\13\11\0\1\13\4\0\1\13\7\0"+
+    "\1\15\3\0\2\13\1\15\2\0\1\15\12\0\1\15"+
+    "\5\0\2\15\1\0\3\15\1\0\1\13\1\15\4\0"+
+    "\1\13\2\0\1\13\5\0\1\15\3\0\1\13\1\0"+
+    "\1\13\20\0\1\13\6\0\1\15\1\13\1\0\1\13"+
+    "\1\0\1\15\26\0\1\13\4\0\1\15\7\0\33\15"+
+    "\1\3\2\15\24\3\1\13\14\3\1\13\11\3\2\15"+
+    "\2\13\10\3\3\15\1\13\1\15\7\3\1\13\1\3"+
+    "\2\15\21\3\1\13\24\3\1\13\1\15\5\3\2\15"+
+    "\12\3\1\13\4\3\1\15\3\3\1\15\1\3\1\15"+
+    "\15\3\1\13\2\15\5\3\1\15\6\3\1\15\13\3"+
+    "\2\15\1\13\2\3\4\15\1\3\1\13\20\3\4\15"+
+    "\5\3\1\13\3\3\1\13\1\3\2\15\4\3\1\13"+
+    "\1\3\1\15\5\3\1\13\4\3\1\15\3\3\1\15"+
+    "\1\13\11\3\1\13\4\3\1\13\7\3\1\15\3\3"+
+    "\2\13\1\15\2\3\1\15\12\3\1\15\5\3\2\15"+
+    "\1\3\3\15\1\3\1\13\1\15\4\3\1\13\2\3"+
+    "\1\13\5\3\1\15\3\3\1\13\1\3\1\13\20\3"+
+    "\1\13\6\3\1\15\1\13\1\3\1\13\1\3\1\15"+
+    "\25\3\1\13\4\3\1\15\7\3\1\0\1\3\22\0"+
+    "\1\5\1\0\1\5\u0165\0\u0165\3\15\0\1\12\4\0"+
+    "\1\12\1\0\1\12\60\0\2\12\10\0\1\12\2\0"+
+    "\1\12\11\0\1\12\5\0\1\12\13\0\1\12\42\0"+
+    "\1\12\11\0\1\12\21\0\1\12\5\0\1\12\2\0"+
+    "\1\12\26\0\1\12\4\0\1\12\7\0\1\12\14\0"+
+    "\1\12\1\0\3\12\20\0\1\12\15\0\1\12\5\0"+
+    "\2\12\24\0\1\12\4\0\1\12\52\0\1\12\6\0"+
+    "\1\12\3\0\1\12\5\0\1\12\1\0\1\12\10\0"+
+    "\1\12\6\0\1\12\3\0\1\12\2\0\1\12\13\0"+
+    "\1\12\16\0\1\12\16\0\1\12\11\0\1\12\2\0"+
+    "\1\12\24\0\1\12\16\0\1\12\3\0\1\12\7\0"+
+    "\3\13\1\0\1\16\24\0\1\15\7\0\1\15\6\0"+
+    "\1\15\2\0\1\13\3\0\1\15\14\0\1\15\1\13"+
+    "\10\0\1\15\7\0\1\13\17\0\1\13\4\0\1\15"+
+    "\22\0\1\15\17\0\1\15\1\0\1\15\6\0\1\15"+
+    "\1\0\2\15\6\0\1\15\16\0\2\15\16\0\1\15"+
+    "\5\0\1\15\4\0\1\15\17\0\1\15\51\0\1\13"+
+    "\1\15\7\0\22\3\1\15\7\3\1\15\6\3\1\15"+
+    "\2\3\1\13\3\3\1\15\14\3\1\15\1\13\10\3"+
+    "\1\15\7\3\1\13\17\3\1\13\4\3\1\15\22\3"+
+    "\1\15\17\3\1\15\1\3\1\15\6\3\1\15\1\3"+
+    "\2\15\6\3\1\15\16\3\2\15\16\3\1\15\5\3"+
+    "\1\15\4\3\1\15\17\3\1\15\10\3\1\13\1\15"+
+    "\7\3\31\0\3\5\364\0\323\3\42\0\1\12\11\0"+
+    "\1\12\12\0\2\12\12\0\1\12\10\0\1\12\22\0"+
+    "\1\12\23\0\1\12\22\0\1\12\23\0\1\12\31\0"+
+    "\1\12\7\0\1\12\26\0\1\12\3\0\1\12\6\0"+
+    "\1\12\7\0\1\12\11\0\1\12\11\0\1\12\4\0"+
+    "\1\12\13\0\2\12\27\0\1\12\5\0\1\12\2\0"+
+    "\1\12\10\0\1\12\1\0\1\12\20\0\1\12\5\0"+
+    "\1\12\1\0\1\12\24\0\1\12\13\0\1\14\2\15"+
+    "\4\14\4\15\1\16\24\0\1\15\10\0\1\15\4\0"+
+    "\1\13\5\0\1\15\1\0\1\13\3\0\1\15\22\0"+
+    "\1\13\17\0\1\13\2\0\1\13\201\0\24\3\1\15"+
+    "\10\3\1\15\4\3\1\13\5\3\1\15\1\3\1\13"+
+    "\3\3\1\15\22\3\1\13\17\3\1\13\2\3\1\13"+
+    "\40\3\1\16\7\0\1\3\3\0\1\16\6\0\1\16"+
+    "\15\0\1\5\1\14\1\5\316\0\155\3\56\0\1\12"+
+    "\20\0\1\12\20\0\1\12\16\0\1\12\227\0\1\12"+
+    "\10\0\1\15\1\16\23\0\2\15\1\0\1\15\11\0"+
+    "\1\13\2\0\2\15\1\13\4\0\1\15\3\0\1\13"+
+    "\155\0\23\3\2\15\1\3\1\15\11\3\1\13\2\3"+
+    "\2\15\1\13\4\3\1\15\3\3\1\13\13\3\5\0"+
+    "\1\16\6\0\1\3\2\0\1\3\261\0\64\3\53\0"+
+    "\1\12\7\0\1\12\24\0\1\12\17\0\1\12\2\0"+
+    "\1\12\176\0\1\16\20\0\1\15\167\0\20\3\1\15"+
+    "\7\3\1\16\255\0\26\3\50\0\1\12\3\0\1\12"+
+    "\7\0\1\12\163\0\1\13\76\0\1\13\40\0\6\3"+
+    "\1\13\11\3\1\16\7\0\1\16\2\0\1\17\211\0"+
+    "\17\3\275\0\1\15\21\0\13\3\1\16\213\0\13\3"+
+    "\16\0\1\12\76\0\1\12\54\0\1\13\14\0\1\13"+
+    "\24\0\5\3\1\16\16\0\1\17\2\0\2\17\1\0"+
+    "\10\17\1\0\2\17\1\0\4\17\1\0\1\17\1\0"+
+    "\2\17\2\0\6\17\2\0\1\17\1\0\1\17\1\0"+
+    "\11\17\3\0\3\17\1\0\3\17\2\0\2\17\1\0"+
+    "\3\17\7\0\1\17\10\0\1\17\4\0\1\17\1\0"+
+    "\2\17\1\0\2\17\2\0\1\17\1\0\1\17\1\0"+
+    "\3\17\3\0\1\17\1\0\1\17\1\0\1\17\1\0"+
+    "\1\17\2\0\1\17\2\0\2\17\1\0\3\17\1\0"+
+    "\5\17\1\0\1\17\1\0\1\17\3\0\4\17\1\0"+
+    "\6\17\4\0\1\17\2\0\1\17\6\0\1\17\1\0"+
+    "\2\17\1\0\5\17\3\0\2\17\2\0\5\17\1\0"+
+    "\14\17\1\0\1\17\2\0\3\17\5\0\2\17\1\0"+
+    "\2\17\2\0\2\17\26\0\3\17\3\0\1\17\124\0"+
+    "\4\3\125\0\1\15\17\0\1\3\1\16\15\0\1\17"+
+    "\4\0\1\17\1\0\1\17\60\0\2\17\10\0\1\17"+
+    "\2\0\1\17\11\0\1\17\5\0\1\17\13\0\1\17"+
+    "\42\0\1\17\11\0\1\17\21\0\1\17\5\0\1\17"+
+    "\2\0\1\17\26\0\1\17\4\0\1\17\7\0\1\17"+
+    "\14\0\1\17\1\0\3\17\20\0\1\17\15\0\1\17"+
+    "\5\0\2\17\24\0\1\17\4\0\1\17\52\0\1\17"+
+    "\6\0\1\17\3\0\1\17\5\0\1\17\1\0\1\17"+
+    "\10\0\1\17\6\0\1\17\3\0\1\17\2\0\1\17"+
+    "\13\0\1\17\16\0\1\17\16\0\1\17\11\0\1\17"+
+    "\2\0\1\17\24\0\1\17\16\0\1\17\3\0\1\17"+
+    "\112\0\1\3\27\0\1\12\14\0\1\12\46\0\1\3"+
+    "\41\0\1\17\11\0\1\17\12\0\2\17\12\0\1\17"+
+    "\10\0\1\17\22\0\1\17\23\0\1\17\22\0\1\17"+
+    "\23\0\1\17\31\0\1\17\7\0\1\17\26\0\1\17"+
+    "\3\0\1\17\6\0\1\17\7\0\1\17\11\0\1\17"+
+    "\11\0\1\17\4\0\1\17\13\0\2\17\27\0\1\17"+
+    "\5\0\1\17\2\0\1\17\10\0\1\17\1\0\1\17"+
+    "\20\0\1\17\5\0\1\17\1\0\1\17\24\0\1\17"+
+    "\13\0\1\16\63\0\1\3\60\0\1\3\56\0\1\17"+
+    "\20\0\1\17\20\0\1\17\16\0\1\17\227\0\1\17"+
+    "\57\0\1\3\35\0\1\3\53\0\1\17\7\0\1\17"+
+    "\24\0\1\17\17\0\1\17\2\0\1\17\324\0\1\17"+
+    "\3\0\1\17\7\0\1\17\u0144\0\1\17\76\0\1\17"+
+    "\272\0\1\17\14\0\1\17\222\0";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[12851];
+    int [] result = new int[12892];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -467,1616 +533,1621 @@ public final class UAX29URLEmailTokenizerImpl {
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\124\0\250\0\374\0\u0150\0\u01a4\0\u01f8\0\u024c"+
-    "\0\u02a0\0\u02f4\0\u0348\0\u039c\0\u03f0\0\u0444\0\u0498\0\u04ec"+
-    "\0\u0540\0\u0594\0\u05e8\0\u063c\0\u0690\0\u06e4\0\u0738\0\u078c"+
-    "\0\u07e0\0\u0834\0\u0888\0\u08dc\0\u0930\0\u0984\0\u09d8\0\u0a2c"+
-    "\0\u0a80\0\u0ad4\0\u0b28\0\u0b7c\0\u0bd0\0\u024c\0\u0c24\0\u0348"+
-    "\0\u0c78\0\u0ccc\0\u039c\0\u0d20\0\u0d74\0\u0dc8\0\u0e1c\0\u0444"+
-    "\0\u0e70\0\u0ec4\0\u0f18\0\u0f6c\0\u0fc0\0\u1014\0\u1068\0\u10bc"+
-    "\0\u1110\0\u1164\0\u11b8\0\u120c\0\u1260\0\u12b4\0\u1308\0\u135c"+
-    "\0\u13b0\0\u0738\0\u1404\0\u1458\0\u14ac\0\u1500\0\u1554\0\u15a8"+
-    "\0\u15fc\0\u1650\0\u16a4\0\u16f8\0\u174c\0\u17a0\0\u17f4\0\u1848"+
-    "\0\u189c\0\u18f0\0\u1944\0\u1998\0\u19ec\0\u1a40\0\u1a94\0\u1ae8"+
-    "\0\u1b3c\0\u1b90\0\u1be4\0\u1c38\0\u1c8c\0\u1ce0\0\u1d34\0\u1d88"+
-    "\0\u1ddc\0\u1e30\0\u1e84\0\u1ed8\0\u1f2c\0\u1f80\0\u1fd4\0\u2028"+
-    "\0\u207c\0\u20d0\0\u2124\0\u2178\0\u21cc\0\u2220\0\u2274\0\u22c8"+
-    "\0\u231c\0\u2370\0\u23c4\0\u2418\0\u246c\0\u24c0\0\u2514\0\u2568"+
-    "\0\u25bc\0\u2610\0\u2664\0\u26b8\0\u270c\0\u2760\0\u27b4\0\u2808"+
-    "\0\u285c\0\u28b0\0\u2904\0\u2958\0\u29ac\0\u2a00\0\u2a54\0\u2aa8"+
-    "\0\u2afc\0\u2b50\0\u2ba4\0\u2bf8\0\u2c4c\0\u2ca0\0\u2cf4\0\u2d48"+
-    "\0\u2d9c\0\u2df0\0\u2e44\0\u2e98\0\u2eec\0\u2f40\0\u2f94\0\u2fe8"+
-    "\0\u303c\0\u3090\0\u30e4\0\u3138\0\u318c\0\u31e0\0\u3234\0\u3288"+
-    "\0\u32dc\0\u3330\0\u3384\0\u33d8\0\u342c\0\u3480\0\u34d4\0\u3528"+
-    "\0\u357c\0\u35d0\0\u3624\0\u3678\0\u36cc\0\u3720\0\u3774\0\u37c8"+
-    "\0\u381c\0\u3870\0\u38c4\0\u3918\0\u396c\0\u39c0\0\u3a14\0\u3a68"+
-    "\0\u3abc\0\u3b10\0\u3b64\0\u3bb8\0\u3c0c\0\u3c60\0\u3cb4\0\u3d08"+
-    "\0\u3d5c\0\u3db0\0\u3e04\0\u3e58\0\u3eac\0\u3f00\0\u3f54\0\u3fa8"+
-    "\0\u3ffc\0\u4050\0\u40a4\0\u40f8\0\u414c\0\u41a0\0\u41f4\0\u4248"+
-    "\0\u429c\0\u42f0\0\u4344\0\u4398\0\u43ec\0\u4440\0\u4494\0\u44e8"+
-    "\0\u453c\0\u4590\0\u45e4\0\u4638\0\250\0\u468c\0\u46e0\0\u4734"+
-    "\0\u4788\0\u47dc\0\u4830\0\u4884\0\u48d8\0\u492c\0\u4980\0\u49d4"+
-    "\0\u4a28\0\u4a7c\0\u4ad0\0\u4b24\0\u4b78\0\u4bcc\0\u4c20\0\u4c74"+
-    "\0\u4cc8\0\u4d1c\0\u4d70\0\u4dc4\0\u4e18\0\u4e6c\0\u4ec0\0\u4f14"+
-    "\0\u4f68\0\u4fbc\0\u5010\0\u5064\0\u50b8\0\u510c\0\u5160\0\u51b4"+
-    "\0\u5208\0\u525c\0\u52b0\0\u5304\0\u5358\0\u53ac\0\u5400\0\u5454"+
-    "\0\u54a8\0\u54fc\0\u5550\0\u55a4\0\u55f8\0\u564c\0\u56a0\0\u56f4"+
-    "\0\u5748\0\u579c\0\u57f0\0\u5844\0\u5898\0\u58ec\0\u5940\0\u5994"+
-    "\0\u59e8\0\u5a3c\0\u5a90\0\u5ae4\0\u5b38\0\u5b8c\0\u5be0\0\u5c34"+
-    "\0\u5c88\0\u5cdc\0\u5d30\0\u5d84\0\u5dd8\0\u5e2c\0\u5e80\0\u5ed4"+
-    "\0\u5f28\0\u5f7c\0\u5fd0\0\u6024\0\u6078\0\u60cc\0\u6120\0\u6174"+
-    "\0\u61c8\0\u621c\0\u6270\0\u62c4\0\u6318\0\u636c\0\u63c0\0\u6414"+
-    "\0\u6468\0\u64bc\0\u6510\0\u6564\0\u65b8\0\u660c\0\u6660\0\u66b4"+
-    "\0\u6708\0\u675c\0\u67b0\0\u6804\0\u6858\0\u68ac\0\u6900\0\u6954"+
-    "\0\u69a8\0\u69fc\0\u6a50\0\u6aa4\0\u6af8\0\u6b4c\0\u6ba0\0\u6bf4"+
-    "\0\u6c48\0\u6c9c\0\u6cf0\0\u6d44\0\u6d98\0\u6dec\0\u6e40\0\u6e94"+
-    "\0\u6ee8\0\u6f3c\0\u6f90\0\u6fe4\0\u7038\0\u708c\0\u70e0\0\u7134"+
-    "\0\u7188\0\u71dc\0\u7230\0\u7284\0\u72d8\0\u732c\0\u7380\0\u73d4"+
-    "\0\u7428\0\u747c\0\u74d0\0\u7524\0\u7578\0\u75cc\0\u7620\0\u7674"+
-    "\0\u76c8\0\u771c\0\u7770\0\u77c4\0\u7818\0\u786c\0\u78c0\0\u7914"+
-    "\0\u7968\0\u79bc\0\u7a10\0\u7a64\0\u7ab8\0\u7b0c\0\u7b60\0\u7bb4"+
-    "\0\u7c08\0\u7c5c\0\u7cb0\0\u7d04\0\u7d58\0\u7dac\0\u7e00\0\u7e54"+
-    "\0\u7ea8\0\u7efc\0\u7f50\0\u7fa4\0\u7ff8\0\u804c\0\u80a0\0\u80f4"+
-    "\0\u8148\0\u819c\0\u81f0\0\u8244\0\u8298\0\u82ec\0\u8340\0\u8394"+
-    "\0\u83e8\0\u843c\0\u8490\0\u84e4\0\u8538\0\u858c\0\u85e0\0\u8634"+
-    "\0\u8688\0\u86dc\0\u8730\0\u8784\0\u87d8\0\u882c\0\u8880\0\u88d4"+
-    "\0\u8928\0\u897c\0\u89d0\0\u8a24\0\u8a78\0\u8acc\0\u8b20\0\u8b74"+
-    "\0\u8bc8\0\u8c1c\0\u8c70\0\u8cc4\0\u8d18\0\u8d6c\0\u8dc0\0\u8e14"+
-    "\0\u8e68\0\u8ebc\0\u8f10\0\u8f64\0\u8fb8\0\u900c\0\u9060\0\u90b4"+
-    "\0\u9108\0\u915c\0\u91b0\0\u9204\0\u9258\0\u92ac\0\u9300\0\u9354"+
-    "\0\u93a8\0\u93fc\0\u9450\0\u94a4\0\u94f8\0\u954c\0\u95a0\0\u95f4"+
-    "\0\u9648\0\u969c\0\u96f0\0\u9744\0\u9798\0\u97ec\0\u9840\0\u9894"+
-    "\0\u98e8\0\u993c\0\u9990\0\u99e4\0\u9a38\0\u9a8c\0\u9ae0\0\u9b34"+
-    "\0\u9b88\0\u9bdc\0\u9c30\0\u9c84\0\u9cd8\0\u9d2c\0\u9d80\0\u9dd4"+
-    "\0\u9e28\0\u9e7c\0\u9ed0\0\u9f24\0\u9f78\0\u9fcc\0\ua020\0\ua074"+
-    "\0\ua0c8\0\ua11c\0\ua170\0\ua1c4\0\ua218\0\ua26c\0\ua2c0\0\ua314"+
-    "\0\ua368\0\ua3bc\0\ua410\0\ua464\0\ua4b8\0\ua50c\0\ua560\0\ua5b4"+
-    "\0\ua608\0\ua65c\0\ua6b0\0\ua704\0\ua758\0\ua7ac\0\ua800\0\ua854"+
-    "\0\ua8a8\0\ua8fc\0\ua950\0\ua9a4\0\ua9f8\0\uaa4c\0\uaaa0\0\uaaf4"+
-    "\0\uab48\0\uab9c\0\uabf0\0\uac44\0\uac98\0\uacec\0\uad40\0\uad94"+
-    "\0\uade8\0\uae3c\0\uae90\0\uaee4\0\uaf38\0\uaf8c\0\uafe0\0\ub034"+
-    "\0\ub088\0\ub0dc\0\ub130\0\ub184\0\ub1d8\0\ub22c\0\ub280\0\ub2d4"+
-    "\0\ub328\0\ub37c\0\ub3d0\0\ub424\0\ub478\0\ub4cc\0\ub520\0\ub574"+
-    "\0\ub5c8\0\ub61c\0\ub670\0\ub6c4\0\ub718\0\ub76c\0\ub7c0\0\ub814"+
-    "\0\ub868\0\ub8bc\0\ub910\0\ub964\0\ub9b8\0\uba0c\0\uba60\0\ubab4"+
-    "\0\ubb08\0\ubb5c\0\ubbb0\0\ubc04\0\ubc58\0\ubcac\0\ubd00\0\ubd54"+
-    "\0\ubda8\0\ubdfc\0\ube50\0\ubea4\0\ubef8\0\ubf4c\0\ubfa0\0\ubff4"+
-    "\0\uc048\0\uc09c\0\uc0f0\0\uc144\0\uc198\0\uc1ec\0\uc240\0\uc294"+
-    "\0\uc2e8\0\uc33c\0\uc390\0\uc3e4\0\uc438\0\uc48c\0\uc4e0\0\uc534"+
-    "\0\uc588\0\uc5dc\0\uc630\0\uc684\0\uc6d8\0\uc72c\0\uc780\0\uc7d4"+
-    "\0\uc828\0\uc87c\0\uc8d0\0\uc924\0\uc978\0\uc9cc\0\uca20\0\uca74"+
-    "\0\ucac8\0\ucb1c\0\ucb70\0\ucbc4\0\ucc18\0\ucc6c\0\uccc0\0\ucd14"+
-    "\0\ucd68\0\ucdbc\0\uce10\0\uce64\0\uceb8\0\ucf0c\0\ucf60\0\ucfb4"+
-    "\0\ud008\0\ud05c\0\ud0b0\0\ud104\0\ud158\0\ud1ac\0\ud200\0\ud254"+
-    "\0\ud2a8\0\ud2fc\0\ud350\0\ud3a4\0\ud3f8\0\ud44c\0\ud4a0\0\ud4f4"+
-    "\0\ud548\0\ud59c\0\ud5f0\0\ud644\0\ud698\0\ud6ec\0\ud740\0\ud794"+
-    "\0\ud7e8\0\ud83c\0\ud890\0\ud8e4\0\ud938\0\ud98c\0\ud9e0\0\uda34"+
-    "\0\uda88\0\udadc\0\udb30\0\udb84\0\udbd8\0\udc2c\0\udc80\0\udcd4"+
-    "\0\udd28\0\udd7c\0\uddd0\0\ude24\0\ude78\0\udecc\0\udf20\0\udf74"+
-    "\0\udfc8\0\ue01c\0\ue070\0\ue0c4\0\ue118\0\ue16c\0\ue1c0\0\ue214"+
-    "\0\ue268\0\ue2bc\0\ue310\0\ue364\0\ue3b8\0\ue40c\0\ue460\0\ue4b4"+
-    "\0\ue508\0\ue55c\0\ue5b0\0\ue604\0\ue658\0\ue6ac\0\ue700\0\ue754"+
-    "\0\ue7a8\0\ue7fc\0\ue850\0\ue8a4\0\ue8f8\0\ue94c\0\ue9a0\0\ue9f4"+
-    "\0\uea48\0\uea9c\0\ueaf0\0\ueb44\0\ueb98\0\uebec\0\uec40\0\uec94"+
-    "\0\uece8\0\ued3c\0\ued90\0\uede4\0\uee38\0\uee8c\0\ueee0\0\uef34"+
-    "\0\uef88\0\uefdc\0\uf030\0\uf084\0\uf0d8\0\uf12c\0\uf180\0\uf1d4"+
-    "\0\uf228\0\uf27c\0\uf2d0\0\uf324\0\uf378\0\uf3cc\0\uf420\0\uf474"+
-    "\0\uf4c8\0\uf51c\0\uf570\0\uf5c4\0\uf618\0\uf66c\0\uf6c0\0\uf714"+
-    "\0\uf768\0\uf7bc\0\uf810\0\uf864\0\uf8b8\0\uf90c\0\uf960\0\uf9b4"+
-    "\0\ufa08\0\ufa5c\0\ufab0\0\ufb04\0\ufb58\0\ufbac\0\ufc00\0\ufc54"+
-    "\0\ufca8\0\ufcfc\0\ufd50\0\ufda4\0\ufdf8\0\ufe4c\0\ufea0\0\ufef4"+
-    "\0\uff48\0\uff9c\0\ufff0\1\104\1\230\1\354\1\u0140\1\u0194"+
-    "\1\u01e8\1\u023c\1\u0290\1\u02e4\1\u0338\1\u038c\1\u03e0\1\u0434"+
-    "\1\u0488\1\u04dc\1\u0530\1\u0584\1\u05d8\1\u062c\1\u0680\1\u06d4"+
-    "\1\u0728\1\u077c\1\u07d0\1\u0824\1\u0878\1\u08cc\1\u0920\1\u0974"+
-    "\1\u09c8\1\u0a1c\1\u0a70\1\u0ac4\1\u0b18\1\u0b6c\1\u0bc0\1\u0c14"+
-    "\1\u0c68\1\u0cbc\1\u0d10\1\u0d64\1\u0db8\1\u0e0c\1\u0e60\1\u0eb4"+
-    "\1\u0f08\1\u0f5c\1\u0fb0\1\u1004\1\u1058\1\u10ac\1\u1100\1\u1154"+
-    "\1\u11a8\1\u11fc\1\u1250\1\u12a4\1\u12f8\1\u134c\1\u13a0\1\u13f4"+
-    "\1\u1448\1\u149c\1\u14f0\1\u1544\1\u1598\1\u15ec\1\u1640\1\u1694"+
-    "\1\u16e8\1\u173c\1\u1790\1\u17e4\1\u1838\1\u188c\1\u18e0\1\u1934"+
-    "\1\u1988\1\u19dc\1\u1a30\1\u1a84\1\u1ad8\1\u1b2c\1\u1b80\1\u1bd4"+
-    "\1\u1c28\1\u1c7c\1\u1cd0\1\u1d24\1\u1d78\1\u1dcc\1\u1e20\1\u1e74"+
-    "\1\u1ec8\1\u1f1c\1\u1f70\1\u1fc4\1\u2018\1\u206c\1\u20c0\1\u2114"+
-    "\1\u2168\1\u21bc\1\u2210\1\u2264\1\u22b8\1\u230c\1\u2360\1\u23b4"+
-    "\1\u2408\1\u245c\1\u24b0\1\u2504\1\u2558\1\u25ac\1\u2600\1\u2654"+
-    "\1\u26a8\1\u26fc\1\u2750\1\u27a4\1\u27f8\1\u284c\1\u28a0\1\u28f4"+
-    "\1\u2948\1\u299c\1\u29f0\1\u2a44\1\u2a98\1\u2aec\1\u2b40\1\u2b94"+
-    "\1\u2be8\1\u2c3c\1\u2c90\1\u2ce4\1\u2d38\1\u2d8c\1\u2de0\1\u2e34"+
-    "\1\u2e88\1\u2edc\1\u2f30\1\u2f84\1\u2fd8\1\u302c\1\u3080\1\u30d4"+
-    "\1\u3128\1\u317c\1\u31d0\1\u3224\1\u3278\1\u32cc\1\u3320\1\u3374"+
-    "\1\u33c8\1\u341c\1\u3470\1\u34c4\1\u3518\1\u356c\1\u35c0\1\u3614"+
-    "\1\u3668\1\u36bc\1\u3710\1\u3764\1\u37b8\1\u380c\1\u3860\1\u38b4"+
-    "\1\u3908\1\u395c\1\u39b0\1\u3a04\1\u3a58\1\u3aac\1\u3b00\1\u3b54"+
-    "\1\u3ba8\1\u3bfc\1\u3c50\1\u3ca4\1\u3cf8\1\u3d4c\1\u3da0\1\u3df4"+
-    "\1\u3e48\1\u3e9c\1\u3ef0\1\u3f44\1\u3f98\1\u3fec\1\u4040\1\u4094"+
-    "\1\u40e8\1\u413c\1\u4190\1\u41e4\1\u4238\1\u428c\1\u42e0\1\u4334"+
-    "\1\u4388\1\u43dc\1\u4430\1\u4484\1\u44d8\1\u452c\1\u4580\1\u45d4"+
-    "\1\u4628\1\u467c\1\u46d0\1\u4724\1\u4778\1\u47cc\1\u4820\1\u4874"+
-    "\1\u48c8\1\u491c\1\u4970\1\u49c4\1\u4a18\1\u4a6c\1\u4ac0\1\u4b14"+
-    "\1\u4b68\1\u4bbc\1\u4c10\1\u4c64\1\u4cb8\1\u4d0c\1\u4d60\1\u4db4"+
-    "\1\u4e08\1\u4e5c\1\u4eb0\1\u4f04\1\u4f58\1\u4fac\1\u5000\1\u5054"+
-    "\1\u50a8\1\u50fc\1\u5150\1\u51a4\1\u51f8\1\u524c\1\u52a0\1\u52f4"+
-    "\1\u5348\1\u539c\1\u53f0\1\u5444\1\u5498\1\u54ec\1\u5540\1\u5594"+
-    "\1\u55e8\1\u563c\1\u5690\1\u56e4\1\u5738\1\u578c\1\u57e0\1\u5834"+
-    "\1\u5888\1\u58dc\1\u5930\1\u5984\1\u59d8\1\u5a2c\1\u5a80\1\u5ad4"+
-    "\1\u5b28\1\u5b7c\1\u5bd0\1\u5c24\1\u5c78\1\u5ccc\1\u5d20\1\u5d74"+
-    "\1\u5dc8\1\u5e1c\1\u5e70\1\u5ec4\1\u5f18\1\u5f6c\1\u5fc0\1\u6014"+
-    "\1\u6068\1\u60bc\1\u6110\1\u6164\1\u61b8\1\u620c\1\u6260\1\u62b4"+
-    "\1\u6308\1\u635c\1\u63b0\1\u6404\1\u6458\1\u64ac\1\u6500\1\u6554"+
-    "\1\u65a8\1\u65fc\1\u6650\1\u66a4\1\u66f8\1\u674c\1\u67a0\1\u67f4"+
-    "\1\u6848\1\u689c\1\u68f0\1\u6944\1\u6998\1\u69ec\1\u6a40\1\u6a94"+
-    "\1\u6ae8\1\u6b3c\1\u6b90\1\u6be4\1\u6c38\1\u6c8c\1\u6ce0\1\u6d34"+
-    "\1\u6d88\1\u6ddc\1\u6e30\1\u6e84\1\u6ed8\1\u6f2c\1\u6f80\1\u6fd4"+
-    "\1\u7028\1\u707c\1\u70d0\1\u7124\1\u7178\1\u71cc\1\u7220\1\u7274"+
-    "\1\u72c8\1\u731c\1\u7370\1\u73c4\1\u7418\1\u746c\1\u74c0\1\u7514"+
-    "\1\u7568\1\u75bc\1\u7610\1\u7664\1\u76b8\1\u770c\1\u7760\1\u77b4"+
-    "\1\u7808\1\u785c\1\u78b0\1\u7904\1\u7958\1\u79ac\1\u7a00\1\u7a54"+
-    "\1\u7aa8\1\u7afc\1\u7b50\1\u7ba4\1\u7bf8\1\u7c4c\1\u7ca0\1\u7cf4"+
-    "\1\u7d48\1\u7d9c\1\u7df0\1\u7e44\1\u7e98\1\u7eec\1\u7f40\1\u7f94"+
-    "\1\u7fe8\1\u803c\1\u8090\1\u80e4\1\u8138\1\u818c\1\u81e0\1\u8234"+
-    "\1\u8288\1\u82dc\1\u8330\1\u8384\1\u83d8\1\u842c\1\u8480\1\u84d4"+
-    "\1\u8528\1\u857c\1\u85d0\1\u8624\1\u8678\1\u86cc\1\u8720\1\u8774"+
-    "\1\u87c8\1\u881c\1\u8870\1\u88c4\1\u8918\1\u896c\1\u89c0\1\u8a14"+
-    "\1\u8a68\1\u8abc\1\u8b10\1\u8b64\1\u8bb8\1\u8c0c\1\u8c60\1\u8cb4"+
-    "\1\u8d08\1\u8d5c\1\u8db0\1\u8e04\1\u8e58\1\u8eac\1\u8f00\1\u8f54"+
-    "\1\u8fa8\1\u8ffc\1\u9050\1\u90a4\1\u90f8\1\u914c\1\u91a0\1\u91f4"+
-    "\1\u9248\1\u929c\1\u92f0\1\u9344\1\u9398\1\u93ec\1\u9440\1\u9494"+
-    "\1\u94e8\1\u953c\1\u9590\1\u95e4\1\u9638\1\u968c\1\u96e0\1\u9734"+
-    "\1\u9788\1\u97dc\1\u9830\1\u9884\1\u98d8\1\u992c\1\u9980\1\u99d4"+
-    "\1\u9a28\1\u9a7c\1\u9ad0\1\u9b24\1\u9b78\1\u9bcc\1\u9c20\1\u9c74"+
-    "\1\u9cc8\1\u9d1c\1\u9d70\1\u9dc4\1\u9e18\1\u9e6c\1\u9ec0\1\u9f14"+
-    "\1\u9f68\1\u9fbc\1\ua010\1\ua064\1\ua0b8\1\ua10c\1\ua160\1\ua1b4"+
-    "\1\ua208\1\ua25c\1\ua2b0\1\ua304\1\ua358\1\ua3ac\0\250\0\374"+
-    "\0\374\0\u0a80\0\u0a80\0\u0ad4\0\u0b28\0\u0ec4\0\u03f0\1\ua400"+
-    "\1\ua454\0\u04ec\0\u7d58\1\ua4a8\1\ua4fc\0\u0f6c\0\u0fc0\1\ua550"+
-    "\0\u0348\0\u1014\0\u0ccc\1\ua5a4\1\ua5f8\1\ua64c\0\250\1\ua6a0"+
-    "\1\ua6f4\1\ua748\1\ua79c\1\ua7f0\1\ua844\1\ua898\1\ua8ec\1\ua940"+
-    "\1\ua994\0\u5dd8\1\ua9e8\1\uaa3c\1\uaa90\1\uaae4\1\uab38\1\uab8c"+
-    "\1\uabe0\1\uac34\1\uac88\1\uacdc\0\u69fc\1\uad30\1\uad84\1\uadd8"+
-    "\1\uae2c\1\uae80\1\uaed4\1\uaf28\1\uaf7c\1\uafd0\1\ub024\1\ub078"+
-    "\1\ub0cc\1\ub120\1\ub174\1\ub1c8\1\ub21c\1\ub270\1\ub2c4\1\ub318"+
-    "\1\ub36c\0\u6708\1\ub3c0\1\ub414\1\ub468\1\ub4bc\1\ub510\1\ub564"+
-    "\1\ub5b8\1\ub60c\1\ub660\1\ub6b4\1\ub708\1\ub75c\1\ub7b0\1\ub804"+
-    "\1\ub858\1\ub8ac\0\u8880\1\ub900\1\ub954\0\u7bb4\1\ub9a8\0\u95f4"+
-    "\1\ub9fc\1\uba50\1\ubaa4\1\ubaf8\1\ubb4c\1\ubba0\1\ubbf4\1\ubc48"+
-    "\1\ubc9c\1\ubcf0\1\ubd44\1\ubd98\1\ubdec\1\ube40\1\ube94\1\ubee8"+
-    "\1\ubf3c\1\ubf90\1\ubfe4\1\uc038\1\uc08c\1\uc0e0\1\uc134\1\uc188"+
-    "\1\uc1dc\1\uc230\1\uc284\1\uc2d8\1\uc32c\1\uc380\1\uc3d4\1\uc428"+
-    "\1\uc47c\1\uc4d0\1\uc524\1\uc578\1\uc5cc\1\ub2c4\1\uc620\1\uc674"+
-    "\1\uc6c8\1\uc71c\1\uc770\1\uc7c4\1\uc818\1\uc86c\1\uc8c0\1\uc914"+
-    "\1\uc968\1\uc9bc\1\uca10\1\uca64\1\ucab8\1\ucb0c\1\ucb60\1\ucbb4"+
-    "\1\ucc08\1\ucc5c\1\uccb0\1\ucd04\1\ucd58\1\ucdac\1\uce00\1\uce54"+
-    "\1\ucea8\0\u80f4\1\ucefc\1\ucf50\1\ucfa4\1\ucff8\1\ud04c\1\ud0a0"+
-    "\1\ud0f4\1\ud148\1\ud19c\1\ud1f0\1\ud244\1\ud298\1\ud2ec\1\ud340"+
-    "\1\ud394\1\ud3e8\1\ud43c\1\ud490\1\ud4e4\1\ud538\1\ud58c\1\ud5e0"+
-    "\1\ud634\1\ud688\1\ud6dc\1\ud730\1\ud784\1\ud7d8\1\ud82c\1\ud880"+
-    "\1\ud8d4\1\ud928\1\ud97c\1\ud9d0\1\uda24\1\uda78\1\udacc\1\udb20"+
-    "\1\udb74\1\udbc8\1\udc1c\1\udc70\1\udcc4\1\udd18\1\udd6c\1\uddc0"+
-    "\1\ude14\1\ude68\1\udebc\1\udf10\1\udf64\1\udfb8\1\ue00c\1\ue060"+
-    "\1\ub318\1\ue0b4\1\ue108\1\ue15c\1\ue1b0\1\ue204\1\ue258\1\ue2ac"+
-    "\1\ue300\1\ue354\1\ue3a8\0\u5454\1\ue3fc\1\ue450\1\ue4a4\1\ue4f8"+
-    "\1\ue54c\1\ue5a0\1\ue5f4\1\ue648\1\ue69c\1\ue6f0\1\ue744\1\ue798"+
-    "\0\u8bc8\1\ucc08\1\ue7ec\1\ue840\1\ue894\1\ue8e8\1\ue93c\1\ue990"+
-    "\1\ue9e4\1\uea38\1\uea8c\1\ueae0\1\ueb34\1\ueb88\1\uebdc\1\uec30"+
-    "\1\uec84\1\uecd8\1\ued2c\1\ued80\1\uedd4\1\uee28\1\uee7c\1\ueed0"+
-    "\1\uef24\1\uef78\1\uefcc\1\uf020\1\uf074\1\uf0c8\1\uf11c\1\uf170"+
-    "\1\uf1c4\1\uf218\1\uf26c\1\uf2c0\1\uf314\1\ud19c\1\uf368\1\uf3bc"+
-    "\1\uf410\1\uf464\1\uf4b8\1\uf50c\1\uf560\1\ue354\1\uf5b4\1\uf608"+
-    "\1\uf65c\1\uf6b0\1\uf704\1\uf758\1\uf7ac\1\uf800\1\uf854\1\uf8a8"+
-    "\1\uf8fc\1\uf950\1\uf9a4\1\uf9f8\1\ufa4c\1\ufaa0\1\ufaf4\1\ufb48"+
-    "\1\ufb9c\1\ufbf0\1\ufc44\1\ufc98\1\ufcec\1\ufd40\1\ufd94\0\u7968"+
-    "\1\ufde8\1\ufe3c\1\ufe90\1\ue93c\1\ufee4\1\uff38\1\uff8c\1\uffe0"+
-    "\2\64\2\210\2\334\2\u0130\2\u0184\2\u01d8\2\u022c\2\u0280"+
-    "\2\u02d4\0\u6120\0\u9204\2\u0328\2\u037c\2\u03d0\2\u0424\2\u0478"+
-    "\2\u04cc\2\u0520\2\u0574\2\u05c8\2\u061c\2\u0670\2\u06c4\0\u621c"+
-    "\2\u0718\2\u076c\2\u07c0\2\u0814\2\u0868\2\u08bc\2\u0910\2\u0964"+
-    "\2\u09b8\2\u0a0c\2\u0a60\2\u0ab4\2\u0b08\2\u0b5c\2\u0bb0\2\u0c04"+
-    "\2\u0c58\2\u0cac\2\u0d00\2\u0d54\2\u0da8\2\u0dfc\2\u0e50\2\u0ea4"+
-    "\2\u0ef8\2\u0f4c\2\u0fa0\2\u0ff4\2\u1048\2\u109c\2\u10f0\2\u1144"+
-    "\2\u1198\2\u11ec\2\u1240\2\u1294\2\u12e8\2\u133c\2\u1390\2\u13e4"+
-    "\2\u1438\2\u148c\2\u14e0\2\u1534\2\u1588\2\u15dc\2\u1630\0\u81f0"+
-    "\2\u1684\2\u16d8\2\u172c\2\u1780\2\u17d4\2\u1828\2\u187c\2\u18d0"+
-    "\2\u1924\2\u1978\2\u19cc\2\u1a20\2\u1a74\2\u1ac8\2\u1b1c\2\u1b70"+
-    "\2\u1bc4\2\u1c18\2\u1c6c\2\u1cc0\2\u1d14\2\u1d68\2\u1dbc\2\u1e10"+
-    "\2\u1e64\2\u1eb8\2\u1f0c\2\u1f60\2\u1fb4\2\u2008\2\u205c\2\u20b0"+
-    "\2\u2104\2\u2158\2\u21ac\2\u2200\2\u2254\2\u22a8\2\u22fc\2\u2350"+
-    "\2\u23a4\2\u23f8\2\u244c\2\u24a0\2\u24f4\2\u2548\2\u259c\2\u25f0"+
-    "\0\u8244\2\u2644\2\u2698\2\u26ec\2\u2740\2\u2794\1\ubdec\2\u27e8"+
-    "\2\u283c\2\u2890\2\u28e4\2\u2938\2\u298c\2\u29e0\2\u2a34\2\u2a88"+
-    "\2\u2adc\2\u2b30\2\u2b84\2\u2bd8\2\u2c2c\2\u2c80\2\u2cd4\2\u2d28"+
-    "\2\u2d7c\2\u2dd0\2\u2e24\2\u2e78\2\u2ecc\2\u2f20\2\u2f74\2\u2fc8"+
-    "\2\u301c\2\u3070\2\u30c4\2\u3118\2\u316c\2\u31c0\2\u3214\2\u3268"+
-    "\2\u32bc\2\u1bc4\2\u3310\2\u3364\2\u33b8\2\u340c\2\u3460\2\u34b4"+
-    "\2\u09b8\2\u3508\2\u355c\2\u35b0\2\u3604\2\u3658\2\u36ac\2\u3700"+
-    "\2\u3754\2\u37a8\2\u37fc\2\u3850\2\u38a4\2\u38f8\2\u394c\2\u39a0"+
-    "\2\u39f4\2\u3a48\2\u3a9c\2\u3af0\2\u3b44\2\u3b98\2\u3bec\2\u3c40"+
-    "\2\u3c94\2\u3ce8\2\u3d3c\2\u3d90\2\u3de4\2\u3e38\2\u3e8c\0\u747c"+
-    "\2\u3ee0\2\u3f34\2\u3f88\2\u3fdc\2\u4030\2\u4084\2\u40d8\2\u412c"+
-    "\2\u4180\2\u41d4\2\u4228\2\u427c\2\u42d0\0\u2274\0\ud1ac\2\u4324"+
-    "\2\u4378\2\u43cc\0\u0348\2\u4420\2\u4474\2\u44c8\2\u451c\2\u4570"+
-    "\2\u45c4\2\u4618\2\u466c\2\u46c0\2\u4714\0\ub22c\2\u4768\2\u47bc"+
-    "\2\u4810\2\u4864\2\u48b8\2\u490c\2\u4960\2\u49b4\2\u4a08\2\u4a5c"+
-    "\0\ube50\2\u4ab0\2\u4b04\2\u4b58\2\u4bac\2\u4c00\2\u4c54\2\u4ca8"+
-    "\2\u4cfc\2\u4d50\2\u4da4\2\u4df8\2\u4e4c\2\u4ea0\2\u4ef4\2\u4f48"+
-    "\2\u4f9c\2\u4ff0\2\u5044\2\u5098\2\u50ec\0\ubb5c\2\u5140\2\u5194"+
-    "\2\u51e8\2\u523c\2\u5290\2\u52e4\2\u5338\2\u538c\2\u53e0\2\u5434"+
-    "\2\u5488\2\u54dc\2\u5530\2\u5584\2\u55d8\2\u562c\0\udcd4\2\u5680"+
-    "\2\u56d4\0\ud008\2\u5728\0\uea48\2\u577c\2\u57d0\2\u5824\2\u5878"+
-    "\2\u58cc\2\u5920\2\u5974\2\u59c8\2\u5a1c\2\u5a70\2\u5ac4\2\u5b18"+
-    "\2\u5b6c\2\u5bc0\2\u5c14\2\u5c68\2\u5cbc\2\u5d10\2\u5d64\2\u5db8"+
-    "\2\u5e0c\2\u5e60\2\u5eb4\2\u5f08\2\u5f5c\2\u5fb0\2\u6004\2\u6058"+
-    "\2\u60ac\2\u6100\2\u6154\2\u61a8\2\u61fc\2\u6250\2\u62a4\2\u62f8"+
-    "\2\u634c\2\u5044\2\u63a0\2\u63f4\2\u6448\2\u649c\2\u64f0\2\u6544"+
-    "\2\u6598\2\u65ec\2\u6640\2\u6694\2\u66e8\2\u673c\2\u6790\2\u67e4"+
-    "\2\u6838\2\u688c\2\u68e0\2\u6934\2\u6988\2\u69dc\2\u6a30\2\u6a84"+
-    "\2\u6ad8\2\u6b2c\2\u6b80\2\u6bd4\2\u6c28\0\ud548\2\u6c7c\2\u6cd0"+
-    "\2\u6d24\2\u6d78\2\u6dcc\2\u6e20\2\u6e74\2\u6ec8\2\u6f1c\2\u6f70"+
-    "\2\u6fc4\2\u7018\2\u706c\2\u70c0\2\u7114\2\u7168\2\u71bc\2\u7210"+
-    "\2\u7264\2\u72b8\2\u730c\2\u7360\2\u73b4\2\u7408\2\u745c\2\u74b0"+
-    "\2\u7504\2\u7558\2\u75ac\2\u7600\2\u7654\2\u76a8\2\u76fc\2\u7750"+
-    "\2\u77a4\2\u77f8\2\u784c\2\u78a0\2\u78f4\2\u7948\2\u799c\2\u79f0"+
-    "\2\u7a44\2\u7a98\2\u7aec\2\u7b40\2\u7b94\2\u7be8\2\u7c3c\2\u7c90"+
-    "\2\u7ce4\2\u7d38\2\u7d8c\2\u7de0\2\u5098\2\u7e34\2\u7e88\2\u7edc"+
-    "\2\u7f30\2\u7f84\2\u7fd8\2\u802c\2\u8080\2\u80d4\2\u8128\0\ua8a8"+
-    "\2\u817c\2\u81d0\2\u8224\2\u8278\2\u82cc\2\u8320\2\u8374\2\u83c8"+
-    "\2\u841c\2\u8470\2\u84c4\2\u8518\0\ue01c\2\u6988\2\u856c\2\u85c0"+
-    "\2\u8614\2\u8668\2\u86bc\2\u8710\2\u8764\2\u87b8\2\u880c\2\u8860"+
-    "\2\u88b4\2\u8908\2\u895c\2\u89b0\2\u8a04\2\u8a58\2\u8aac\2\u8b00"+
-    "\2\u8b54\2\u8ba8\2\u8bfc\2\u8c50\2\u8ca4\2\u8cf8\2\u8d4c\2\u8da0"+
-    "\2\u8df4\2\u8e48\2\u8e9c\2\u8ef0\2\u8f44\2\u8f98\2\u8fec\2\u9040"+
-    "\2\u9094\2\u6f1c\2\u90e8\2\u913c\2\u9190\2\u91e4\2\u9238\2\u928c"+
-    "\2\u92e0\2\u80d4\2\u9334\2\u9388\2\u93dc\2\u9430\2\u9484\2\u94d8"+
-    "\2\u952c\2\u9580\2\u95d4\2\u9628\2\u967c\2\u96d0\2\u9724\2\u9778"+
-    "\2\u97cc\2\u9820\2\u9874\2\u98c8\2\u991c\2\u9970\2\u99c4\2\u9a18"+
-    "\2\u9a6c\2\u9ac0\2\u9b14\0\ucdbc\2\u9b68\2\u9bbc\2\u9c10\2\u86bc"+
-    "\2\u9c64\2\u9cb8\2\u9d0c\2\u9d60\2\u9db4\2\u9e08\2\u9e5c\2\u9eb0"+
-    "\2\u9f04\2\u9f58\2\u9fac\2\ua000\2\ua054\0\ub574\0\ue658\2\ua0a8"+
-    "\2\ua0fc\2\ua150\2\ua1a4\2\ua1f8\2\ua24c\2\ua2a0\2\ua2f4\2\ua348"+
-    "\2\ua39c\2\ua3f0\2\ua444\0\ub670\2\ua498\2\ua4ec\2\ua540\2\ua594"+
-    "\2\ua5e8\2\ua63c\2\ua690\2\ua6e4\2\ua738\2\ua78c\2\ua7e0\2\ua834"+
-    "\2\ua888\2\ua8dc\2\ua930\2\ua984\2\ua9d8\2\uaa2c\2\uaa80\2\uaad4"+
-    "\2\uab28\2\uab7c\2\uabd0\2\uac24\2\uac78\2\uaccc\2\uad20\2\uad74"+
-    "\2\uadc8\2\uae1c\2\uae70\2\uaec4\2\uaf18\2\uaf6c\2\uafc0\2\ub014"+
-    "\2\ub068\2\ub0bc\2\ub110\2\ub164\2\ub1b8\2\ub20c\2\ub260\2\ub2b4"+
-    "\2\ub308\2\ub35c\2\ub3b0\0\ud644\2\ub404\2\ub458\2\ub4ac\2\ub500"+
-    "\2\ub554\2\ub5a8\2\ub5fc\2\ub650\2\ub6a4\2\ub6f8\2\ub74c\2\ub7a0"+
-    "\2\ub7f4\2\ub848\2\ub89c\2\ub8f0\2\ub944\2\ub998\2\ub9ec\2\uba40"+
-    "\2\uba94\2\ubae8\2\ubb3c\2\ubb90\2\ubbe4\2\ubc38\2\ubc8c\2\ubce0"+
-    "\2\ubd34\2\ubd88\2\ubddc\2\ube30\2\ube84\2\ubed8\2\ubf2c\2\ubf80"+
-    "\2\ubfd4\2\uc028\2\uc07c\2\uc0d0\2\uc124\2\uc178\2\uc1cc\2\uc220"+
-    "\2\uc274\2\uc2c8\2\uc31c\2\uc370\0\ud698\2\uc3c4\2\uc418\2\uc46c"+
-    "\2\uc4c0\2\uc514\2\u5b6c\2\uc568\2\uc5bc\2\uc610\2\uc664\2\uc6b8"+
-    "\2\uc70c\2\uc760\2\uc7b4\2\uc808\2\uc85c\2\uc8b0\2\uc904\2\uc958"+
-    "\2\uc9ac\2\uca00\2\uca54\2\ucaa8\2\ucafc\2\ucb50\2\ucba4\2\ucbf8"+
-    "\2\ucc4c\2\ucca0\2\uccf4\2\ucd48\2\ucd9c\2\ucdf0\2\uce44\2\uce98"+
-    "\2\uceec\2\ucf40\2\ucf94\2\ucfe8\2\ud03c\2\ub944\2\ud090\2\ud0e4"+
-    "\2\ud138\2\ud18c\2\ud1e0\2\ud234\2\ua738\2\ud288\2\ud2dc\2\ud330"+
-    "\2\ud384\2\ud3d8\2\ud42c\2\ud480\2\ud4d4\2\ud528\2\ud57c\2\ud5d0"+
-    "\2\ud624\2\ud678\2\ud6cc\2\ud720\2\ud774\2\ud7c8\2\ud81c\2\ud870"+
-    "\2\ud8c4\2\ud918\2\ud96c\2\ud9c0\2\uda14\2\uda68\2\udabc\2\udb10"+
-    "\2\udb64\2\udbb8\0\uc8d0\2\udc0c\2\udc60\2\udcb4\2\udd08\2\udd5c"+
-    "\2\uddb0\2\ude04\2\ude58\2\udeac\2\udf00\2\udf54\2\udfa8\2\udffc"+
-    "\2\ue050\2\ue0a4\2\ue0f8\2\ue14c\2\ue1a0\2\ue1f4\2\ue248\2\ue29c"+
-    "\2\ue2f0\2\ue344\2\ue398\2\ue3ec\2\ue440\2\ue494\2\ue4e8\2\ue53c"+
-    "\2\ue590\2\ue5e4\2\ue638\2\ue68c\2\ue6e0\2\ue734\2\ue788\2\ue7dc"+
-    "\2\ue830\2\ue884\2\ue8d8\2\ue92c\2\ue980\2\ue9d4\2\uea28\2\uea7c"+
-    "\2\uead0\2\ueb24\2\ueb78\2\uebcc\2\uec20\2\uec74\2\uecc8\2\ued1c"+
-    "\2\ued70\2\uedc4\2\uee18\2\uee6c\2\ueec0\2\uef14\2\uef68\2\uefbc"+
-    "\2\uf010\2\uf064\2\uf0b8\2\uf10c\2\uf160\2\uf1b4\2\uf208\2\uf25c"+
-    "\2\uf2b0\2\uf304\2\uf358\2\uf3ac\2\uf400\2\uf454\2\uf4a8\2\uf4fc"+
-    "\2\uf550\2\uf5a4\2\uf5f8\2\uf64c\2\uf6a0\2\uf6f4\2\uf748\2\uf79c"+
-    "\2\uf7f0\2\uf844\2\uf898\2\uf8ec\2\uf940\2\uf994\2\uf9e8\2\ufa3c"+
-    "\2\ufa90\2\ufae4\2\ufb38\2\ufb8c\2\ufbe0\2\ufc34\2\ufc88\2\ufcdc"+
-    "\2\ufd30\2\ufd84\2\ufdd8\2\ufe2c\2\ufe80\2\ufed4\2\uff28\2\uff7c"+
-    "\2\uffd0\3\44\3\170\3\314\3\u0120\3\u0174\3\u01c8\3\u021c"+
-    "\3\u0270\3\u02c4\3\u0318\3\u036c\3\u03c0\3\u0414\3\u0468\3\u04bc"+
-    "\3\u0510\3\u0564\3\u05b8\3\u060c\3\u0660\3\u06b4\3\u0708\3\u075c"+
-    "\3\u07b0\3\u0804\3\u0858\3\u08ac\3\u0900\3\u0954\3\u09a8\3\u09fc"+
-    "\3\u0a50\3\u0aa4\3\u0af8\3\u0b4c\3\u0ba0\3\u0bf4\3\u0c48\3\u0c9c"+
-    "\3\u0cf0\3\u0d44\3\u0d98\3\u0dec\3\u0e40\3\u0e94\3\u0ee8\3\u0f3c"+
-    "\3\u0f90\3\u0fe4\3\u1038\3\u108c\3\u10e0\3\u1134\3\u1188\3\u11dc"+
-    "\3\u1230\3\u1284\3\u12d8\3\u132c\3\u1380\3\u13d4\3\u1428\3\u147c"+
-    "\3\u14d0\3\u1524\3\u1578\3\u15cc\3\u1620\3\u1674\3\u16c8\3\u171c"+
-    "\3\u1770\3\u17c4\3\u1818\3\u186c\3\u18c0\3\u1914\3\u1968\3\u19bc"+
-    "\3\u1a10\3\u1a64\3\u1ab8\3\u1b0c\3\u1b60\3\u1bb4\3\u1c08\3\u1c5c"+
-    "\3\u1cb0\3\u1d04\3\u1d58\3\u1dac\3\u1e00\3\u1e54\3\u1ea8\3\u1efc"+
-    "\3\u1f50\3\u1fa4\3\u1ff8\3\u204c\3\u20a0\3\u20f4\3\u2148\3\u219c"+
-    "\3\u21f0\3\u2244\3\u2298\3\u22ec\3\u2340\3\u2394\3\u23e8\3\u243c"+
-    "\3\u2490\3\u24e4\3\u2538\3\u258c\3\u25e0\3\u2634\3\u2688\3\u26dc"+
-    "\3\u2730\3\u2784\3\u27d8\3\u282c\3\u2880\3\u28d4\3\u2928\3\u297c"+
-    "\3\u29d0\3\u2a24\3\u2a78\3\u2acc\3\u2b20\3\u2b74\3\u2bc8\3\u2c1c"+
-    "\3\u2c70\3\u2cc4\3\u2d18\3\u2d6c\3\u2dc0\3\u2e14\3\u2e68\3\u2ebc"+
-    "\3\u2f10\3\u2f64\3\u2fb8\3\u300c\3\u3060\3\u30b4\3\u3108\3\u315c"+
-    "\3\u31b0\3\u3204\3\u3258\3\u32ac\3\u3300\3\u3354\3\u33a8\3\u33fc"+
-    "\3\u3450\3\u34a4\3\u34f8\3\u354c\3\u35a0\3\u35f4\3\u3648\3\u369c"+
-    "\3\u36f0\3\u3744\3\u3798\3\u37ec\3\u3840\3\u3894\3\u38e8\3\u393c"+
-    "\3\u3990\3\u39e4\3\u3a38\3\u3a8c\3\u3ae0\3\u3b34\3\u3b88\3\u3bdc"+
-    "\3\u3c30\3\u3c84\3\u3cd8\3\u3d2c\3\u3d80\3\u3dd4\3\u3e28\3\u3e7c"+
-    "\3\u3ed0\3\u3f24\3\u3f78\3\u3fcc\3\u4020\3\u4074\3\u40c8\3\u411c"+
-    "\3\u4170\3\u41c4\3\u4218\3\u426c\3\u42c0\3\u4314\3\u4368\3\u43bc"+
-    "\3\u4410\3\u4464\3\u44b8\3\u450c\3\u4560\3\u45b4\3\u4608\3\u465c"+
-    "\3\u46b0\3\u4704\3\u4758\3\u47ac\3\u4800\3\u4854\3\u48a8\3\u48fc"+
-    "\3\u4950\3\u49a4\3\u49f8\3\u4a4c\3\u4aa0\3\u4af4\3\u4b48\3\u4b9c"+
-    "\3\u4bf0\3\u4c44\3\u4c98\3\u4cec\3\u4d40\3\u4d94\3\u4de8\3\u4e3c"+
-    "\3\u4e90\3\u4ee4\3\u4f38\3\u4f8c\3\u4fe0\3\u5034\3\u5088\3\u50dc"+
-    "\3\u5130\3\u5184\3\u51d8\3\u522c\3\u5280\3\u52d4\3\u5328\3\u537c"+
-    "\3\u53d0\3\u5424\3\u5478\3\u54cc\3\u5520\3\u5574\3\u55c8\3\u561c"+
-    "\3\u5670\3\u56c4\3\u5718\3\u576c\3\u57c0\3\u5814\3\u5868\3\u58bc"+
-    "\3\u5910\3\u5964\3\u59b8\3\u5a0c\3\u5a60\3\u5ab4\3\u5b08\3\u5b5c"+
-    "\3\u5bb0\3\u5c04\3\u5c58\3\u5cac\3\u5d00\3\u5d54\3\u5da8\3\u5dfc"+
-    "\3\u5e50\3\u5ea4\3\u5ef8\3\u5f4c\3\u5fa0\3\u5ff4\3\u6048\3\u609c"+
-    "\3\u60f0\3\u6144\3\u6198\3\u61ec\3\u6240\3\u6294\3\u62e8\3\u633c"+
-    "\3\u6390\3\u63e4\3\u6438\3\u648c\3\u64e0\3\u6534\3\u6588\3\u65dc"+
-    "\3\u6630\3\u6684\3\u66d8\3\u672c\3\u6780\3\u67d4\3\u6828\3\u687c"+
-    "\3\u68d0\3\u6924\3\u6978\3\u69cc\3\u6a20\3\u6a74\3\u6ac8\3\u6b1c"+
-    "\3\u6b70\3\u6bc4\3\u6c18\3\u6c6c\3\u6cc0\3\u6d14\3\u6d68\3\u6dbc"+
-    "\3\u6e10\3\u6e64\3\u6eb8\3\u6f0c\3\u6f60\3\u6fb4\3\u7008\3\u705c"+
-    "\3\u70b0\3\u7104\3\u7158\3\u71ac\3\u7200\3\u7254\3\u72a8\3\u72fc"+
-    "\3\u7350\3\u73a4\3\u73f8\3\u744c\3\u74a0\3\u74f4\3\u7548\3\u759c"+
-    "\3\u75f0\3\u7644\3\u7698\3\u76ec\3\u7740\3\u7794\3\u77e8\3\u783c"+
-    "\3\u7890\3\u78e4\3\u7938\3\u798c\3\u79e0\3\u7a34\3\u7a88\3\u7adc"+
-    "\3\u7b30\3\u7b84\3\u7bd8\3\u7c2c\3\u7c80\3\u7cd4\3\u7d28\3\u7d7c"+
-    "\3\u7dd0\3\u7e24\3\u7e78\3\u7ecc\3\u7f20\3\u7f74\3\u7fc8\3\u801c"+
-    "\3\u8070\3\u80c4\3\u8118\3\u816c\3\u81c0\3\u8214\3\u8268\3\u82bc"+
-    "\3\u8310\3\u8364\3\u83b8\3\u840c\3\u8460\3\u84b4\3\u8508\3\u855c"+
-    "\3\u85b0\3\u8604\3\u8658\3\u86ac\3\u8700\3\u8754\3\u87a8\3\u87fc"+
-    "\3\u8850\3\u88a4\3\u88f8\3\u894c\3\u89a0\3\u89f4\3\u8a48\3\u8a9c"+
-    "\3\u8af0\3\u8b44\3\u8b98\3\u8bec\3\u8c40\3\u8c94\3\u8ce8\3\u8d3c"+
-    "\3\u8d90\3\u8de4\3\u8e38\3\u8e8c\3\u8ee0\3\u8f34\3\u8f88\3\u8fdc"+
-    "\3\u9030\3\u9084\3\u90d8\3\u912c\3\u9180\3\u91d4\3\u9228\3\u927c"+
-    "\3\u92d0\3\u9324\3\u9378\3\u93cc\3\u9420\3\u9474\3\u94c8\3\u951c"+
-    "\3\u9570\3\u95c4\3\u9618\3\u966c\3\u96c0\3\u9714\3\u9768\3\u97bc"+
-    "\3\u9810\3\u9864\3\u98b8\3\u990c\3\u9960\3\u99b4\3\u9a08\3\u9a5c"+
-    "\3\u9ab0\3\u9b04\3\u9b58\3\u9bac\3\u9c00\3\u9c54\3\u9ca8\3\u9cfc"+
-    "\3\u9d50\3\u9da4\3\u9df8\3\u9e4c\3\u9ea0\3\u9ef4\3\u9f48\3\u9f9c"+
-    "\3\u9ff0\3\ua044\3\ua098\3\ua0ec\3\ua140\3\ua194\3\ua1e8\3\ua23c"+
-    "\3\ua290\3\ua2e4\3\ua338\3\ua38c\3\ua3e0\3\ua434\3\ua488\3\ua4dc"+
-    "\3\ua530\3\ua584\3\ua5d8\3\ua62c\3\ua680\3\ua6d4\3\ua728\3\ua77c"+
-    "\3\ua7d0\3\ua824\3\ua878\3\ua8cc\3\ua920\3\ua974\3\ua9c8\3\uaa1c"+
-    "\3\uaa70\3\uaac4\3\uab18\3\uab6c\3\uabc0\3\uac14\3\uac68\3\uacbc"+
-    "\3\uad10\3\uad64\3\uadb8\3\uae0c\3\uae60\3\uaeb4\3\uaf08\3\uaf5c"+
-    "\3\uafb0\3\ub004\3\ub058\3\ub0ac\3\ub100\3\ub154\3\ub1a8\3\ub1fc"+
-    "\3\ub250\3\ub2a4\3\ub2f8\3\ub34c\3\ub3a0\3\ub3f4\3\ub448\3\ub49c"+
-    "\3\ub4f0\3\ub544\3\ub598\3\ub5ec\3\ub640\3\ub694\3\ub6e8\3\ub73c"+
-    "\3\ub790\3\ub7e4\3\ub838\3\ub88c\3\ub8e0\3\ub934\3\ub988\3\ub9dc"+
-    "\3\uba30\3\uba84\3\ubad8\3\ubb2c\3\ubb80\3\ubbd4\3\ubc28\3\ubc7c"+
-    "\3\ubcd0\3\ubd24\3\ubd78\3\ubdcc\3\ube20\3\ube74\3\ubec8\3\ubf1c"+
-    "\3\ubf70\3\ubfc4\3\uc018\3\uc06c\3\uc0c0\3\uc114\3\uc168\3\uc1bc"+
-    "\3\uc210\3\uc264\3\uc2b8\3\uc30c\3\uc360\3\uc3b4\3\uc408\3\uc45c"+
-    "\3\uc4b0\3\uc504\3\uc558\3\uc5ac\3\uc600\3\uc654\3\uc6a8\3\uc6fc"+
-    "\3\uc750\3\uc7a4\3\uc7f8\3\uc84c\3\uc8a0\3\uc8f4\3\uc948\3\uc99c"+
-    "\3\uc9f0\3\uca44\3\uca98\3\ucaec\3\ucb40\3\ucb94\3\ucbe8\3\ucc3c"+
-    "\3\ucc90\3\ucce4\3\ucd38\3\ucd8c\3\ucde0\3\uce34\3\uce88\3\ucedc"+
-    "\3\ucf30\3\ucf84\3\ucfd8\3\ud02c\3\ud080\3\ud0d4\3\ud128\3\ud17c"+
-    "\3\ud1d0\3\ud224\3\ud278\3\ud2cc\3\ud320\3\ud374\3\ud3c8\3\ud41c"+
-    "\3\ud470\3\ud4c4\3\ud518\3\ud56c\3\ud5c0\3\ud614\3\ud668\3\ud6bc"+
-    "\3\ud710\3\ud764\3\ud7b8\3\ud80c\3\ud860\3\ud8b4\3\ud908\3\ud95c"+
-    "\3\ud9b0\3\uda04\3\uda58\3\udaac\3\udb00\3\udb54\3\udba8\3\udbfc"+
-    "\3\udc50\3\udca4\3\udcf8\3\udd4c\3\udda0\3\uddf4\3\ude48\3\ude9c"+
-    "\3\udef0\3\udf44\3\udf98\3\udfec\3\ue040\3\ue094\3\ue0e8\3\ue13c"+
-    "\3\ue190\3\ue1e4\3\ue238\3\ue28c\3\ue2e0\3\ue334\3\ue388\3\ue3dc"+
-    "\3\ue430\3\ue484\3\ue4d8\3\ue52c\3\ue580\3\ue5d4\3\ue628\3\ue67c"+
-    "\3\ue6d0\3\ue724\3\ue778\3\ue7cc\3\ue820\3\ue874\3\ue8c8\3\ue91c"+
-    "\3\ue970\3\ue9c4\3\uea18\3\uea6c\3\ueac0\3\ueb14\3\ueb68\3\uebbc"+
-    "\3\uec10\3\uec64\3\uecb8\3\ued0c\3\ued60\3\uedb4\3\uee08\3\uee5c"+
-    "\3\ueeb0\3\uef04\3\uef58\3\uefac\3\uf000\3\uf054\3\uf0a8\3\uf0fc"+
-    "\3\uf150\3\uf1a4\3\uf1f8\3\uf24c\3\uf2a0\3\uf2f4\3\uf348\3\uf39c"+
-    "\3\uf3f0\3\uf444\3\uf498\3\uf4ec\3\uf540\3\uf594\3\uf5e8\3\uf63c"+
-    "\3\uf690\3\uf6e4\3\uf738\3\uf78c\3\uf7e0\3\uf834\3\uf888\3\uf8dc"+
-    "\3\uf930\3\uf984\3\uf9d8\3\ufa2c\3\ufa80\3\ufad4\3\ufb28\3\ufb7c"+
-    "\3\ufbd0\3\ufc24\3\ufc78\3\ufccc\3\ufd20\3\ufd74\3\ufdc8\3\ufe1c"+
-    "\3\ufe70\3\ufec4\3\uff18\3\uff6c\3\uffc0\4\24\4\150\4\274"+
-    "\4\u0110\4\u0164\4\u01b8\4\u020c\4\u0260\4\u02b4\4\u0308\4\u035c"+
-    "\4\u03b0\4\u0404\4\u0458\4\u04ac\4\u0500\4\u0554\4\u05a8\4\u05fc"+
-    "\4\u0650\4\u06a4\4\u06f8\4\u074c\4\u07a0\4\u07f4\4\u0848\4\u089c"+
-    "\4\u08f0\4\u0944\4\u0998\4\u09ec\4\u0a40\4\u0a94\4\u0ae8\4\u0b3c"+
-    "\4\u0b90\4\u0be4\4\u0c38\4\u0c8c\4\u0ce0\4\u0d34\4\u0d88\4\u0ddc"+
-    "\4\u0e30\4\u0e84\4\u0ed8\4\u0f2c\4\u0f80\4\u0fd4\4\u1028\4\u107c"+
-    "\4\u10d0\4\u1124\4\u1178\4\u11cc\4\u1220\4\u1274\4\u12c8\4\u131c"+
-    "\4\u1370\4\u13c4\4\u1418\4\u146c\4\u14c0\4\u1514\4\u1568\4\u15bc"+
-    "\4\u1610\4\u1664\4\u16b8\4\u170c\4\u1760\4\u17b4\4\u1808\4\u185c"+
-    "\4\u18b0\4\u1904\4\u1958\4\u19ac\4\u1a00\4\u1a54\4\u1aa8\4\u1afc"+
-    "\4\u1b50\4\u1ba4\4\u1bf8\4\u1c4c\4\u1ca0\4\u1cf4\4\u1d48\4\u1d9c"+
-    "\4\u1df0\4\u1e44\4\u1e98\4\u1eec\4\u1f40\4\u1f94\4\u1fe8\4\u203c"+
-    "\4\u2090\4\u20e4\4\u2138\4\u218c\4\u21e0\4\u2234\4\u2288\4\u22dc"+
-    "\4\u2330\4\u2384\4\u23d8\4\u242c\4\u2480\4\u24d4\4\u2528\4\u257c"+
-    "\4\u25d0\4\u2624\4\u2678\4\u26cc\4\u2720\4\u2774\4\u27c8\4\u281c"+
-    "\4\u2870\4\u28c4\4\u2918\4\u296c\4\u29c0\4\u2a14\4\u2a68\4\u2abc"+
-    "\4\u2b10\4\u2b64\4\u2bb8\4\u2c0c\4\u2c60\4\u2cb4\4\u2d08\4\u2d5c"+
-    "\4\u2db0\4\u2e04\4\u2e58\4\u2eac\4\u2f00\4\u2f54\4\u2fa8\4\u2ffc"+
-    "\4\u3050\4\u30a4\4\u30f8\4\u314c\4\u31a0\4\u31f4\4\u3248\4\u329c"+
-    "\4\u32f0\4\u3344\4\u3398\4\u33ec\4\u3440\4\u3494\4\u34e8\4\u353c"+
-    "\4\u3590\4\u35e4\4\u3638\4\u368c\4\u36e0\4\u3734\4\u3788\4\u37dc"+
-    "\4\u3830\4\u3884\4\u38d8\4\u392c\4\u3980\4\u39d4\4\u3a28\4\u3a7c"+
-    "\4\u3ad0\4\u3b24\4\u3b78\4\u3bcc\4\u3c20\4\u3c74\4\u3cc8\4\u3d1c"+
-    "\4\u3d70\4\u3dc4\4\u3e18\4\u3e6c\4\u3ec0\4\u3f14\4\u3f68\4\u3fbc"+
-    "\4\u4010\4\u4064\4\u40b8\4\u410c\4\u4160\4\u41b4\4\u4208\4\u425c"+
-    "\4\u42b0\4\u4304\4\u4358\4\u43ac\4\u4400\4\u4454\4\u44a8\4\u44fc"+
-    "\4\u4550\4\u45a4\4\u45f8\4\u464c\4\u46a0\4\u46f4\4\u4748\4\u479c"+
-    "\4\u47f0\4\u4844\4\u4898\4\u48ec\4\u4940\4\u4994\4\u49e8\4\u4a3c"+
-    "\4\u4a90\4\u4ae4\4\u4b38\4\u4b8c\4\u4be0\4\u4c34\4\u4c88\4\u4cdc"+
-    "\4\u4d30\4\u4d84\4\u4dd8\4\u4e2c\4\u4e80\4\u4ed4\4\u4f28\4\u4f7c"+
-    "\4\u4fd0\4\u5024\4\u5078\4\u50cc\4\u5120\4\u5174\4\u51c8\4\u521c"+
-    "\4\u5270\4\u52c4\4\u5318\4\u536c\4\u53c0\4\u5414\4\u5468\4\u54bc"+
-    "\4\u5510\4\u5564\4\u55b8\4\u560c\4\u5660\4\u56b4\4\u5708\4\u575c"+
-    "\4\u57b0\4\u5804\4\u5858\4\u58ac\4\u5900\4\u5954\4\u59a8\4\u59fc"+
-    "\4\u5a50\4\u5aa4\4\u5af8\4\u5b4c\4\u5ba0\4\u5bf4\4\u5c48\4\u5c9c"+
-    "\4\u5cf0\4\u5d44\4\u5d98\4\u5dec\4\u5e40\4\u5e94\4\u5ee8\4\u5f3c"+
-    "\4\u5f90\4\u5fe4\4\u6038\4\u608c\4\u60e0\4\u6134\4\u6188\4\u61dc"+
-    "\4\u6230\4\u6284\4\u62d8\4\u632c\4\u6380\4\u63d4\4\u6428\4\u647c"+
-    "\4\u64d0\4\u6524\4\u6578\4\u65cc\4\u6620\4\u6674\4\u66c8\4\u671c"+
-    "\4\u6770\4\u67c4\4\u6818\4\u686c\4\u68c0\4\u6914\4\u6968\4\u69bc"+
-    "\4\u6a10\4\u6a64\4\u6ab8\4\u6b0c\4\u6b60\4\u6bb4\4\u6c08\4\u6c5c"+
-    "\4\u6cb0\4\u6d04\4\u6d58\4\u6dac\4\u6e00\4\u6e54\4\u6ea8\4\u6efc"+
-    "\4\u6f50\4\u6fa4\4\u6ff8\4\u704c\4\u70a0\4\u70f4\4\u7148\4\u719c"+
-    "\4\u71f0\4\u7244\4\u7298\4\u72ec\0\u039c\0\u0ad4\0\u0c78\0\u19ec"+
-    "\0\u1a40\0\u1a94\0\u1ae8\0\u1b3c\0\u1b90\0\u1be4\0\u1c38\0\u1c8c"+
-    "\0\u1ce0\0\u1d34\0\u1d88\0\u1ddc\0\u1e30\0\u1e84\0\u1ed8\0\u1f2c"+
-    "\0\u1f80\0\u1fd4\0\u2028\0\u207c\0\u20d0\0\u2124\0\u2178\0\u21cc"+
-    "\0\u2220\4\u7340\4\u7394\4\u73e8\4\u743c\4\u7490\4\u74e4\4\u7538"+
-    "\4\u758c\4\u75e0\4\u7634\4\u7688\1\ud880\4\u76dc\4\u7730\4\u7784"+
-    "\4\u77d8\4\u782c\4\u7880\4\u78d4\4\u7928\4\u797c\4\u79d0\4\u7a24"+
-    "\1\uccb0\4\u7a78\4\u7acc\4\u7b20\4\u7b74\4\u7bc8\4\u7c1c\4\u7c70"+
-    "\4\u7cc4\4\u7d18\4\u7d6c\4\u7dc0\4\u7e14\4\u7e68\4\u7ebc\4\u7f10"+
-    "\4\u7f64\4\u7fb8\4\u800c\1\uaed4\4\u8060\2\u298c\1\ucdac\4\u80b4"+
-    "\4\u8108\4\u815c\4\u81b0\2\u2b30\4\u8204\4\u8258\4\u82ac\4\u8300"+
-    "\4\u8354\4\u83a8\4\u83fc\4\u8450\4\u84a4\4\u84f8\1\ua4fc\4\u854c"+
-    "\4\u85a0\4\u85f4\4\u8648\4\u869c\4\u86f0\4\u8744\4\u8798\4\u87ec"+
-    "\4\u8840\4\u8894\4\u88e8\4\u893c\4\u8990\4\u89e4\4\u8a38\4\u8a8c"+
-    "\4\u8ae0\4\u8b34\4\u8b88\4\u8bdc\4\u8c30\1\ue300\4\u8c84\4\u8cd8"+
-    "\4\u8d2c\4\u8d80\4\u8dd4\4\u8e28\4\u8e7c\4\u8ed0\4\u8f24\4\u8f78"+
-    "\4\u8fcc\4\u9020\4\u9074\4\u90c8\2\64\4\u911c\4\u9170\4\u91c4"+
-    "\4\u9218\4\u926c\4\u92c0\4\u9314\4\u9368\4\u93bc\4\u9410\4\u9464"+
-    "\4\u94b8\4\u950c\4\u9560\4\u95b4\1\ufcec\4\u9608\4\u965c\4\u96b0"+
-    "\4\u9704\4\u9758\4\u97ac\4\u9800\4\u9854\4\u98a8\4\u98fc\4\u9950"+
-    "\4\u99a4\4\u99f8\4\u9a4c\4\u9aa0\4\u9af4\4\u9b48\4\u9b9c\4\u9bf0"+
-    "\4\u9c44\4\u9c98\4\u9cec\4\u9d40\4\u9d94\4\u9de8\4\u9e3c\4\u9e90"+
-    "\1\uc674\2\u08bc\4\u9ee4\4\u9f38\4\u9f8c\4\u9fe0\4\ua034\0\u8f64"+
-    "\4\ua088\2\u07c0\4\ua0dc\4\ua130\4\ua184\4\ua1d8\1\ue108\4\ua22c"+
-    "\4\ua280\4\ua2d4\4\ua328\4\ua37c\4\ua3d0\4\ua424\4\ua478\4\ua4cc"+
-    "\4\ua520\0\u5010\4\ua574\4\ua5c8\4\ua61c\4\ua670\4\ua6c4\4\ua718"+
-    "\2\u2794\4\ua76c\4\ua7c0\4\ua814\4\ua868\1\uc4d0\4\ua8bc\4\ua910"+
-    "\4\ua964\4\ua9b8\4\uaa0c\4\uaa60\4\uaab4\4\uab08\4\uab5c\4\uabb0"+
-    "\4\uac04\4\uac58\4\uacac\4\uad00\4\uad54\4\uada8\4\uadfc\4\uae50"+
-    "\4\uaea4\4\uaef8\4\uaf4c\4\uafa0\4\uaff4\4\ub048\4\ub09c\4\ub0f0"+
-    "\4\ub144\1\ubb4c\4\ub198\4\ub1ec\4\ub240\1\uf218\1\ub1c8\4\ub294"+
-    "\4\ub2e8\4\ub33c\4\ub390\4\ub3e4\4\ub438\4\ub48c\4\ub4e0\4\ub534"+
-    "\4\ub588\4\ub5dc\4\ub630\4\ub684\4\ub6d8\4\ub72c\4\ub780\4\ub7d4"+
-    "\4\ub828\4\ub87c\4\ub8d0\4\ub924\4\ub978\1\ue6f0\4\ub9cc\4\uba20"+
-    "\4\uba74\4\ubac8\4\ubb1c\4\ubb70\4\ubbc4\4\ubc18\4\ubc6c\4\ubcc0"+
-    "\4\ubd14\4\ubd68\4\ubdbc\4\ube10\4\ube64\4\ubeb8\4\ubf0c\4\ubf60"+
-    "\4\ubfb4\4\uc008\4\uc05c\4\uc0b0\4\uc104\4\uc158\4\uc1ac\4\uc200"+
-    "\4\uc254\4\uc2a8\4\uc2fc\4\uc350\4\uc3a4\4\uc3f8\4\uc44c\4\uc4a0"+
-    "\4\uc4f4\4\uc548\4\uc59c\4\uc5f0\4\uc644\4\uc698\4\uc6ec\4\uc740"+
-    "\4\uc794\4\uc7e8\4\uc83c\4\uc890\4\uc8e4\4\uc938\4\uc98c\4\uc9e0"+
-    "\4\uca34\4\uca88\1\ue258\4\ucadc\4\ucb30\4\ucb84\4\ucbd8\4\ucc2c"+
-    "\4\ucc80\4\uccd4\4\ucd28\1\ucd04\4\ucd7c\4\ucdd0\4\uce24\4\uce78"+
-    "\4\ucecc\4\ucf20\4\uaf4c\4\ucf74\4\ucfc8\4\ud01c\0\u7914\4\ud070"+
-    "\4\ud0c4\4\ud118\4\ud16c\4\ud1c0\4\ud214\4\ud268\4\ud2bc\4\ud310"+
-    "\4\ud364\4\ud3b8\4\ud40c\4\ud460\4\ud4b4\4\ud508\4\ud55c\4\ud5b0"+
-    "\1\uafd0\4\ud604\4\ud658\4\ud6ac\4\ud700\4\ud754\4\ud7a8\4\ud7fc"+
-    "\4\ud850\4\ud8a4\4\ud8f8\4\ud94c\4\ud9a0\4\ud9f4\4\uda48\4\uda9c"+
-    "\4\udaf0\4\udb44\4\udb98\4\udbec\4\udc40\4\udc94\4\udce8\4\udd3c"+
-    "\4\udd90\4\udde4\4\ude38\4\ude8c\1\ufd40\4\udee0\4\udf34\4\udf88"+
-    "\4\udfdc\4\u8d80\4\ue030\4\ue084\4\ue0d8\4\ue12c\4\uab08\4\ue180"+
-    "\4\ue1d4\4\ue228\4\ue27c\4\ue2d0\1\ub414\4\ue324\4\ue378\4\ue3cc"+
-    "\4\ue420\4\ue474\4\ue4c8\4\ue51c\4\ue570\4\ue5c4\4\ue618\2\u1c6c"+
-    "\4\ue66c\4\ue6c0\4\ub588\4\ue714\4\ue768\4\ue7bc\4\ue810\4\ue864"+
-    "\4\ue8b8\4\ue90c\4\ue960\4\ue9b4\4\uea08\2\u1294\4\uea5c\4\ueab0"+
-    "\4\ueb04\4\ueb58\4\uebac\4\uec00\4\uec54\4\ueca8\0\u2514\0\u2568"+
-    "\0\u25bc\0\u2610\0\u2664\0\u26b8\0\u270c\0\u2760\0\u27b4\0\u2808"+
-    "\0\u285c\0\u28b0\0\u2904\0\u2958\0\u29ac\0\u2a00\0\u2a54\0\u2aa8"+
-    "\0\u2afc\0\u2b50\0\u2ba4\0\u2bf8\0\u2c4c\0\u2ca0\0\u2cf4\0\u2d48"+
-    "\4\uecfc\2\u7600\4\ued50\4\ueda4\4\uedf8\4\uee4c\4\ueea0\4\ueef4"+
-    "\4\uef48\4\uef9c\4\ueff0\4\uf044\4\uf098\2\u6a30\4\uf0ec\4\uf140"+
-    "\4\uf194\4\uf1e8\4\uf23c\4\uf290\4\uf2e4\4\uf338\4\uf38c\4\uf3e0"+
-    "\4\uf434\4\uf488\4\uf4dc\4\uf530\4\uf584\4\uf5d8\4\uf62c\4\uf680"+
-    "\2\u4c54\4\uf6d4\2\uc70c\2\u6b2c\4\uf728\4\uf77c\4\uf7d0\4\uf824"+
-    "\2\uc8b0\4\uf878\4\uf8cc\4\uf920\4\uf974\4\uf9c8\4\ufa1c\4\ufa70"+
-    "\4\ufac4\4\ufb18\4\ufb6c\2\u4378\4\ufbc0\4\ufc14\4\ufc68\4\ufcbc"+
-    "\4\ufd10\4\ufd64\4\ufdb8\4\ufe0c\4\ufe60\4\ufeb4\4\uff08\4\uff5c"+
-    "\4\uffb0\5\4\5\130\5\254\5\u0100\5\u0154\5\u01a8\5\u01fc"+
-    "\5\u0250\5\u02a4\2\u8080\5\u02f8\5\u034c\5\u03a0\5\u03f4\5\u0448"+
-    "\5\u049c\5\u04f0\5\u0544\5\u0598\5\u05ec\5\u0640\5\u0694\5\u06e8"+
-    "\5\u073c\2\u9db4\5\u0790\5\u07e4\5\u0838\5\u088c\5\u08e0\5\u0934"+
-    "\5\u0988\5\u09dc\5\u0a30\5\u0a84\5\u0ad8\5\u0b2c\5\u0b80\5\u0bd4"+
-    "\5\u0c28\2\u9a6c\5\u0c7c\5\u0cd0\5\u0d24\5\u0d78\5\u0dcc\5\u0e20"+
-    "\5\u0e74\5\u0ec8\5\u0f1c\5\u0f70\5\u0fc4\5\u1018\5\u106c\5\u10c0"+
-    "\5\u1114\5\u1168\5\u11bc\5\u1210\5\u1264\5\u12b8\5\u130c\5\u1360"+
-    "\5\u13b4\5\u1408\5\u145c\5\u14b0\5\u1504\2\u63f4\2\ua63c\5\u1558"+
-    "\5\u15ac\5\u1600\5\u1654\5\u16a8\0\ue3b8\5\u16fc\2\ua540\5\u1750"+
-    "\5\u17a4\5\u17f8\5\u184c\2\u7e88\5\u18a0\5\u18f4\5\u1948\5\u199c"+
-    "\5\u19f0\5\u1a44\5\u1a98\5\u1aec\5\u1b40\5\u1b94\0\ua464\5\u1be8"+
-    "\5\u1c3c\5\u1c90\5\u1ce4\5\u1d38\5\u1d8c\2\uc514\5\u1de0\5\u1e34"+
-    "\5\u1e88\5\u1edc\2\u6250\5\u1f30\5\u1f84\5\u1fd8\5\u202c\5\u2080"+
-    "\5\u20d4\5\u2128\5\u217c\5\u21d0\5\u2224\5\u2278\5\u22cc\5\u2320"+
-    "\5\u2374\5\u23c8\5\u241c\5\u2470\5\u24c4\5\u2518\5\u256c\5\u25c0"+
-    "\5\u2614\5\u2668\5\u26bc\5\u2710\5\u2764\5\u27b8\2\u58cc\5\u280c"+
-    "\5\u2860\5\u28b4\2\u8f98\2\u4f48\5\u2908\5\u295c\5\u29b0\5\u2a04"+
-    "\5\u2a58\5\u2aac\5\u2b00\5\u2b54\5\u2ba8\5\u2bfc\5\u2c50\5\u2ca4"+
-    "\5\u2cf8\5\u2d4c\5\u2da0\5\u2df4\5\u2e48\5\u2e9c\5\u2ef0\5\u2f44"+
-    "\5\u2f98\5\u2fec\2\u8470\5\u3040\5\u3094\5\u30e8\5\u313c\5\u3190"+
-    "\5\u31e4\5\u3238\5\u328c\5\u32e0\5\u3334\5\u3388\5\u33dc\5\u3430"+
-    "\5\u3484\5\u34d8\5\u352c\5\u3580\5\u35d4\5\u3628\5\u367c\5\u36d0"+
-    "\5\u3724\5\u3778\5\u37cc\5\u3820\5\u3874\5\u38c8\5\u391c\5\u3970"+
-    "\5\u39c4\5\u3a18\5\u3a6c\5\u3ac0\5\u3b14\5\u3b68\5\u3bbc\5\u3c10"+
-    "\5\u3c64\5\u3cb8\5\u3d0c\5\u3d60\5\u3db4\5\u3e08\5\u3e5c\5\u3eb0"+
-    "\5\u3f04\5\u3f58\5\u3fac\5\u4000\5\u4054\5\u40a8\5\u40fc\2\u7fd8"+
-    "\5\u4150\5\u41a4\5\u41f8\5\u424c\5\u42a0\5\u42f4\5\u4348\5\u439c"+
-    "\2\u6a84\5\u43f0\5\u4444\5\u4498\5\u44ec\5\u4540\5\u4594\5\u25c0"+
-    "\5\u45e8\5\u463c\5\u4690\0\ucd68\5\u46e4\5\u4738\5\u478c\5\u47e0"+
-    "\5\u4834\5\u4888\5\u48dc\5\u4930\5\u4984\5\u49d8\5\u4a2c\5\u4a80"+
-    "\5\u4ad4\5\u4b28\5\u4b7c\5\u4bd0\5\u4c24\2\u4d50\5\u4c78\5\u4ccc"+
-    "\5\u4d20\5\u4d74\5\u4dc8\5\u4e1c\5\u4e70\5\u4ec4\5\u4f18\5\u4f6c"+
-    "\5\u4fc0\5\u5014\5\u5068\5\u50bc\5\u5110\5\u5164\5\u51b8\5\u520c"+
-    "\5\u5260\5\u52b4\5\u5308\5\u535c\5\u53b0\5\u5404\5\u5458\5\u54ac"+
-    "\5\u5500\2\u9ac0\5\u5554\5\u55a8\5\u55fc\5\u5650\5\u03f4\5\u56a4"+
-    "\5\u56f8\5\u574c\5\u57a0\5\u217c\5\u57f4\5\u5848\5\u589c\5\u58f0"+
-    "\5\u5944\2\u5194\5\u5998\5\u59ec\5\u5a40\5\u5a94\5\u5ae8\5\u5b3c"+
-    "\5\u5b90\5\u5be4\5\u5c38\5\u5c8c\2\ub9ec\5\u5ce0\5\u2bfc\5\u5d34"+
-    "\5\u5d88\5\u5ddc\5\u5e30\5\u5e84\5\u5ed8\5\u5f2c\5\u5f80\5\u5fd4"+
-    "\5\u6028\2\ub014\5\u607c\5\u60d0\5\u6124\5\u6178\5\u61cc\5\u6220"+
-    "\5\u6274\5\u62c8\5\u631c\5\u6370\5\u63c4\5\u6418\5\u646c\5\u64c0"+
-    "\5\u6514\5\u6568\5\u65bc\5\u6610\5\u6664\5\u66b8\5\u670c\5\u6760"+
-    "\5\u67b4\5\u6808\5\u685c\5\u68b0\5\u6904\5\u6958\5\u69ac\5\u6a00"+
-    "\5\u6a54\5\u6aa8\5\u6afc\5\u6b50\5\u6ba4\5\u6bf8\5\u6c4c\5\u6ca0"+
-    "\5\u6cf4\5\u6d48\5\u6d9c\5\u6df0\5\u6e44\5\u6e98\5\u6eec\5\u6f40"+
-    "\5\u6f94\5\u6fe8\5\u703c\5\u7090\5\u70e4\5\u7138\5\u718c\5\u71e0"+
-    "\5\u7234\5\u7288\5\u72dc\5\u7330\5\u7384\5\u73d8\5\u742c\5\u7480"+
-    "\5\u74d4\5\u7528\5\u757c\5\u75d0\5\u7624\5\u7678\5\u76cc\5\u7720"+
-    "\5\u7774\5\u77c8\5\u781c\5\u7870\5\u78c4\5\u7918\5\u796c\5\u79c0"+
-    "\5\u7a14\5\u7a68\5\u7abc\5\u7b10\5\u7b64\5\u7bb8\5\u7c0c\5\u7c60"+
-    "\5\u7cb4\5\u7d08\5\u7d5c\5\u7db0\5\u7e04\5\u7e58\5\u7eac\5\u7f00"+
-    "\5\u7f54\5\u7fa8\5\u7ffc\5\u8050\5\u80a4\5\u80f8\5\u814c\5\u81a0"+
-    "\5\u81f4\5\u8248\5\u829c\5\u82f0\5\u8344\5\u8398\5\u83ec\5\u8440"+
-    "\5\u8494\5\u84e8\5\u853c\5\u8590\5\u85e4\5\u8638\5\u868c\5\u86e0"+
-    "\5\u8734\5\u8788\5\u87dc\5\u8830\5\u8884\5\u88d8\5\u892c\5\u8980"+
-    "\5\u89d4\5\u8a28\5\u8a7c\5\u8ad0\5\u8b24\5\u8b78\5\u8bcc\5\u8c20"+
-    "\5\u8c74\5\u8cc8\5\u8d1c\5\u8d70\5\u8dc4\5\u8e18\5\u8e6c\5\u8ec0"+
-    "\5\u8f14\5\u8f68\5\u8fbc\5\u9010\5\u9064\5\u90b8\5\u910c\5\u9160"+
-    "\5\u91b4\5\u9208\5\u925c\5\u92b0\5\u9304\5\u9358\5\u93ac\5\u9400"+
-    "\5\u9454\5\u94a8\5\u94fc\5\u9550\5\u95a4\5\u95f8\5\u964c\5\u96a0"+
-    "\5\u96f4\5\u9748\5\u979c\5\u97f0\5\u9844\5\u9898\5\u98ec\5\u9940"+
-    "\5\u9994\5\u99e8\5\u9a3c\5\u9a90\5\u9ae4\5\u9b38\5\u9b8c\5\u9be0"+
-    "\5\u9c34\5\u9c88\5\u9cdc\5\u9d30\5\u9d84\5\u9dd8\5\u9e2c\5\u9e80"+
-    "\5\u9ed4\5\u9f28\5\u9f7c\5\u9fd0\5\ua024\5\ua078\5\ua0cc\5\ua120"+
-    "\5\ua174\5\ua1c8\5\ua21c\5\ua270\5\ua2c4\5\ua318\5\ua36c\5\ua3c0"+
-    "\5\ua414\5\ua468\5\ua4bc\5\ua510\5\ua564\5\ua5b8\5\ua60c\5\ua660"+
-    "\5\ua6b4\5\ua708\5\ua75c\5\ua7b0\5\ua804\5\ua858\5\ua8ac\5\ua900"+
-    "\5\ua954\5\ua9a8\5\ua9fc\5\uaa50\5\uaaa4\5\uaaf8\5\uab4c\5\uaba0"+
-    "\5\uabf4\5\uac48\5\uac9c\5\uacf0\5\uad44\5\uad98\5\uadec\5\uae40"+
-    "\5\uae94\5\uaee8\5\uaf3c\5\uaf90\5\uafe4\5\ub038\5\ub08c\5\ub0e0"+
-    "\5\ub134\5\ub188\5\ub1dc\5\ub230\5\ub284\5\ub2d8\5\ub32c\5\ub380"+
-    "\5\ub3d4\5\ub428\5\ub47c\5\ub4d0\5\ub524\5\ub578\5\ub5cc\5\ub620"+
-    "\5\ub674\5\ub6c8\5\ub71c\5\ub770\5\ub7c4\5\ub818\5\ub86c\5\ub8c0"+
-    "\5\ub914\5\ub968\5\ub9bc\5\uba10\5\uba64\5\ubab8\5\ubb0c\5\ubb60"+
-    "\5\ubbb4\5\ubc08\5\ubc5c\5\ubcb0\5\ubd04\5\ubd58\5\ubdac\5\ube00"+
-    "\5\ube54\5\ubea8\5\ubefc\5\ubf50\5\ubfa4\5\ubff8\5\uc04c\5\uc0a0"+
-    "\5\uc0f4\5\uc148\5\uc19c\5\uc1f0\5\uc244\5\uc298\5\uc2ec\5\uc340"+
-    "\5\uc394\5\uc3e8\5\uc43c\5\uc490\5\uc4e4\5\uc538\5\uc58c\5\uc5e0"+
-    "\5\uc634\5\uc688\5\uc6dc\5\uc730\5\uc784\5\uc7d8\5\uc82c\5\uc880"+
-    "\5\uc8d4\5\uc928\5\uc97c\5\uc9d0\5\uca24\5\uca78\5\ucacc\5\ucb20"+
-    "\5\ucb74\5\ucbc8\5\ucc1c\5\ucc70\5\uccc4\5\ucd18\5\ucd6c\5\ucdc0"+
-    "\5\uce14\5\uce68\5\ucebc\5\ucf10\5\ucf64\5\ucfb8\5\ud00c\5\ud060"+
-    "\5\ud0b4\5\ud108\5\ud15c\5\ud1b0\5\ud204\5\ud258\5\ud2ac\5\ud300"+
-    "\5\ud354\5\ud3a8\5\ud3fc\5\ud450\5\ud4a4\5\ud4f8\5\ud54c\5\ud5a0"+
-    "\5\ud5f4\5\ud648\5\ud69c\5\ud6f0\5\ud744\5\ud798\5\ud7ec\5\ud840"+
-    "\5\ud894\5\ud8e8\5\ud93c\5\ud990\5\ud9e4\5\uda38\5\uda8c\5\udae0"+
-    "\5\udb34\5\udb88\5\udbdc\5\udc30\5\udc84\5\udcd8\5\udd2c\5\udd80"+
-    "\5\uddd4\5\ude28\5\ude7c\5\uded0\5\udf24\5\udf78\5\udfcc\5\ue020"+
-    "\5\ue074\5\ue0c8\5\ue11c\5\ue170\5\ue1c4\5\ue218\5\ue26c\5\ue2c0"+
-    "\5\ue314\5\ue368\5\ue3bc\5\ue410\5\ue464\5\ue4b8\5\ue50c\5\ue560"+
-    "\5\ue5b4\5\ue608\5\ue65c\5\ue6b0\5\ue704\5\ue758\5\ue7ac\5\ue800"+
-    "\5\ue854\5\ue8a8\5\ue8fc\5\ue950\5\ue9a4\5\ue9f8\5\uea4c\5\ueaa0"+
-    "\5\ueaf4\5\ueb48\5\ueb9c\5\uebf0\5\uec44\5\uec98\5\uecec\5\ued40"+
-    "\5\ued94\5\uede8\5\uee3c\5\uee90\5\ueee4\5\uef38\5\uef8c\5\uefe0"+
-    "\5\uf034\5\uf088\5\uf0dc\5\uf130\5\uf184\5\uf1d8\5\uf22c\5\uf280"+
-    "\5\uf2d4\5\uf328\5\uf37c\5\uf3d0\5\uf424\5\uf478\5\uf4cc\5\uf520"+
-    "\5\uf574\5\uf5c8\5\uf61c\5\uf670\5\uf6c4\5\uf718\5\uf76c\5\uf7c0"+
-    "\5\uf814\5\uf868\5\uf8bc\5\uf910\5\uf964\5\uf9b8\5\ufa0c\5\ufa60"+
-    "\5\ufab4\5\ufb08\5\ufb5c\5\ufbb0\5\ufc04\5\ufc58\5\ufcac\5\ufd00"+
-    "\5\ufd54\5\ufda8\5\ufdfc\5\ufe50\5\ufea4\5\ufef8\5\uff4c\5\uffa0"+
-    "\5\ufff4\6\110\6\234\6\360\6\u0144\6\u0198\6\u01ec\6\u0240"+
-    "\6\u0294\6\u02e8\6\u033c\6\u0390\6\u03e4\6\u0438\6\u048c\6\u04e0"+
-    "\6\u0534\6\u0588\6\u05dc\6\u0630\6\u0684\6\u06d8\6\u072c\6\u0780"+
-    "\6\u07d4\6\u0828\6\u087c\6\u08d0\6\u0924\6\u0978\6\u09cc\6\u0a20"+
-    "\6\u0a74\6\u0ac8\6\u0b1c\6\u0b70\6\u0bc4\6\u0c18\6\u0c6c\6\u0cc0"+
-    "\6\u0d14\6\u0d68\6\u0dbc\6\u0e10\6\u0e64\6\u0eb8\6\u0f0c\6\u0f60"+
-    "\6\u0fb4\6\u1008\6\u105c\6\u10b0\6\u1104\6\u1158\6\u11ac\6\u1200"+
-    "\6\u1254\6\u12a8\6\u12fc\6\u1350\6\u13a4\6\u13f8\6\u144c\6\u14a0"+
-    "\6\u14f4\6\u1548\6\u159c\6\u15f0\6\u1644\6\u1698\6\u16ec\6\u1740"+
-    "\6\u1794\6\u17e8\6\u183c\6\u1890\6\u18e4\6\u1938\6\u198c\6\u19e0"+
-    "\6\u1a34\6\u1a88\6\u1adc\6\u1b30\6\u1b84\6\u1bd8\6\u1c2c\6\u1c80"+
-    "\6\u1cd4\6\u1d28\6\u1d7c\6\u1dd0\6\u1e24\6\u1e78\6\u1ecc\6\u1f20"+
-    "\6\u1f74\6\u1fc8\6\u201c\6\u2070\6\u20c4\6\u2118\6\u216c\6\u21c0"+
-    "\6\u2214\6\u2268\6\u22bc\6\u2310\6\u2364\6\u23b8\6\u240c\6\u2460"+
-    "\6\u24b4\6\u2508\6\u255c\6\u25b0\6\u2604\6\u2658\6\u26ac\6\u2700"+
-    "\6\u2754\6\u27a8\6\u27fc\6\u2850\6\u28a4\6\u28f8\6\u294c\6\u29a0"+
-    "\6\u29f4\6\u2a48\6\u2a9c\6\u2af0\6\u2b44\6\u2b98\6\u2bec\6\u2c40"+
-    "\6\u2c94\6\u2ce8\6\u2d3c\6\u2d90\6\u2de4\6\u2e38\6\u2e8c\6\u2ee0"+
-    "\6\u2f34\6\u2f88\6\u2fdc\6\u3030\6\u3084\6\u30d8\6\u312c\6\u3180"+
-    "\6\u31d4\6\u3228\6\u327c\6\u32d0\6\u3324\6\u3378\6\u33cc\6\u3420"+
-    "\6\u3474\6\u34c8\6\u351c\6\u3570\6\u35c4\6\u3618\6\u366c\6\u36c0"+
-    "\6\u3714\6\u3768\6\u37bc\6\u3810\6\u3864\6\u38b8\6\u390c\6\u3960"+
-    "\6\u39b4\6\u3a08\6\u3a5c\6\u3ab0\6\u3b04\6\u3b58\6\u3bac\6\u3c00"+
-    "\6\u3c54\6\u3ca8\6\u3cfc\6\u3d50\6\u3da4\6\u3df8\6\u3e4c\6\u3ea0"+
-    "\6\u3ef4\6\u3f48\6\u3f9c\6\u3ff0\6\u4044\6\u4098\6\u40ec\6\u4140"+
-    "\6\u4194\6\u41e8\6\u423c\6\u4290\6\u42e4\6\u4338\6\u438c\6\u43e0"+
-    "\6\u4434\6\u4488\6\u44dc\6\u4530\6\u4584\6\u45d8\6\u462c\6\u4680"+
-    "\6\u46d4\6\u4728\6\u477c\6\u47d0\6\u4824\6\u4878\6\u48cc\6\u4920"+
-    "\

<TRUNCATED>

[05/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
index 8b288c2..a2ad394 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
@@ -65,147 +66,212 @@ public final class StandardTokenizerImpl {
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\42\0\1\15\4\0\1\14\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\57\0\1\1"+
-    "\2\0\1\3\7\0\1\1\1\0\1\6\2\0\1\1\5\0\27\1"+
-    "\1\0\37\1\1\0\u01ca\1\4\0\14\1\5\0\1\6\10\0\5\1"+
-    "\7\0\1\1\1\0\1\1\21\0\160\3\5\1\1\0\2\1\2\0"+
-    "\4\1\1\7\7\0\1\1\1\6\3\1\1\0\1\1\1\0\24\1"+
-    "\1\0\123\1\1\0\213\1\1\0\7\3\236\1\11\0\46\1\2\0"+
-    "\1\1\7\0\47\1\1\0\1\7\7\0\55\3\1\0\1\3\1\0"+
-    "\2\3\1\0\2\3\1\0\1\3\10\0\33\16\5\0\3\16\1\1"+
-    "\1\6\13\0\5\3\7\0\2\7\2\0\13\3\1\0\1\3\3\0"+
-    "\53\1\25\3\12\4\1\0\1\4\1\7\1\0\2\1\1\3\143\1"+
-    "\1\0\1\1\10\3\1\0\6\3\2\1\2\3\1\0\4\3\2\1"+
-    "\12\4\3\1\2\0\1\1\17\0\1\3\1\1\1\3\36\1\33\3"+
-    "\2\0\131\1\13\3\1\1\16\0\12\4\41\1\11\3\2\1\2\0"+
-    "\1\7\1\0\1\1\5\0\26\1\4\3\1\1\11\3\1\1\3\3"+
-    "\1\1\5\3\22\0\31\1\3\3\104\0\1\1\1\0\13\1\67\0"+
-    "\33\3\1\0\4\3\66\1\3\3\1\1\22\3\1\1\7\3\12\1"+
-    "\2\3\2\0\12\4\1\0\7\1\1\0\7\1\1\0\3\3\1\0"+
-    "\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0"+
-    "\4\1\2\0\1\3\1\1\7\3\2\0\2\3\2\0\3\3\1\1"+
-    "\10\0\1\3\4\0\2\1\1\0\3\1\2\3\2\0\12\4\2\1"+
-    "\17\0\3\3\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1"+
-    "\1\0\2\1\1\0\2\1\1\0\2\1\2\0\1\3\1\0\5\3"+
-    "\4\0\2\3\2\0\3\3\3\0\1\3\7\0\4\1\1\0\1\1"+
-    "\7\0\12\4\2\3\3\1\1\3\13\0\3\3\1\0\11\1\1\0"+
-    "\3\1\1\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0"+
-    "\1\3\1\1\10\3\1\0\3\3\1\0\3\3\2\0\1\1\17\0"+
-    "\2\1\2\3\2\0\12\4\21\0\3\3\1\0\10\1\2\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\3"+
-    "\1\1\7\3\2\0\2\3\2\0\3\3\10\0\2\3\4\0\2\1"+
-    "\1\0\3\1\2\3\2\0\12\4\1\0\1\1\20\0\1\3\1\1"+
-    "\1\0\6\1\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1"+
-    "\1\0\2\1\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\3"+
-    "\3\0\3\3\1\0\4\3\2\0\1\1\6\0\1\3\16\0\12\4"+
-    "\21\0\3\3\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1"+
-    "\1\0\5\1\3\0\1\1\7\3\1\0\3\3\1\0\4\3\7\0"+
-    "\2\3\1\0\2\1\6\0\2\1\2\3\2\0\12\4\22\0\2\3"+
-    "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+
-    "\2\0\1\3\1\1\7\3\1\0\3\3\1\0\4\3\7\0\2\3"+
-    "\7\0\1\1\1\0\2\1\2\3\2\0\12\4\1\0\2\1\17\0"+
-    "\2\3\1\0\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\3"+
-    "\1\0\3\3\1\0\4\3\1\1\10\0\1\3\10\0\2\1\2\3"+
-    "\2\0\12\4\12\0\6\1\2\0\2\3\1\0\22\1\3\0\30\1"+
-    "\1\0\11\1\1\0\1\1\2\0\7\1\3\0\1\3\4\0\6\3"+
-    "\1\0\1\3\1\0\10\3\22\0\2\3\15\0\60\20\1\21\2\20"+
-    "\7\21\5\0\7\20\10\21\1\0\12\4\47\0\2\20\1\0\1\20"+
-    "\2\0\2\20\1\0\1\20\2\0\1\20\6\0\4\20\1\0\7\20"+
-    "\1\0\3\20\1\0\1\20\1\0\1\20\2\0\2\20\1\0\4\20"+
-    "\1\21\2\20\6\21\1\0\2\21\1\20\2\0\5\20\1\0\1\20"+
-    "\1\0\6\21\2\0\12\4\2\0\4\20\40\0\1\1\27\0\2\3"+
-    "\6\0\12\4\13\0\1\3\1\0\1\3\1\0\1\3\4\0\2\3"+
-    "\10\1\1\0\44\1\4\0\24\3\1\0\2\3\5\1\13\3\1\0"+
-    "\44\3\11\0\1\3\71\0\53\20\24\21\1\20\12\4\6\0\6\20"+
-    "\4\21\4\20\3\21\1\20\3\21\2\20\7\21\3\20\4\21\15\20"+
-    "\14\21\1\20\1\21\12\4\4\21\2\20\46\1\1\0\1\1\5\0"+
-    "\1\1\2\0\53\1\1\0\4\1\u0100\2\111\1\1\0\4\1\2\0"+
-    "\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0\4\1\2\0"+
-    "\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+
-    "\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0\3\3\40\0"+
-    "\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0\32\1\5\0"+
-    "\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\3\13\0\22\1"+
-    "\3\3\13\0\22\1\2\3\14\0\15\1\1\0\3\1\1\0\2\3"+
-    "\14\0\64\20\40\21\3\0\1\20\4\0\1\20\1\21\2\0\12\4"+
-    "\41\0\4\3\1\0\12\4\6\0\130\1\10\0\51\1\1\3\1\1"+
-    "\5\0\106\1\12\0\35\1\3\0\14\3\4\0\14\3\12\0\12\4"+
-    "\36\20\2\0\5\20\13\0\54\20\4\0\21\21\7\20\2\21\6\0"+
-    "\12\4\1\20\3\0\2\20\40\0\27\1\5\3\4\0\65\20\12\21"+
-    "\1\0\35\21\2\0\1\3\12\4\6\0\12\4\6\0\16\20\122\0"+
-    "\5\3\57\1\21\3\7\1\4\0\12\4\21\0\11\3\14\0\3\3"+
-    "\36\1\15\3\2\1\12\4\54\1\16\3\14\0\44\1\24\3\10\0"+
-    "\12\4\3\0\3\1\12\4\44\1\122\0\3\3\1\0\25\3\4\1"+
-    "\1\3\4\1\3\3\2\1\11\0\300\1\47\3\25\0\4\3\u0116\1"+
-    "\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1"+
-    "\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1"+
-    "\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1"+
-    "\4\0\15\1\5\0\3\1\1\0\7\1\17\0\4\3\10\0\2\10"+
-    "\12\0\1\10\2\0\1\6\2\0\5\3\20\0\2\11\3\0\1\7"+
-    "\17\0\1\11\13\0\5\3\1\0\12\3\1\0\1\1\15\0\1\1"+
-    "\20\0\15\1\63\0\41\3\21\0\1\1\4\0\1\1\2\0\12\1"+
-    "\1\0\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1"+
-    "\1\0\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1"+
-    "\21\0\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1"+
-    "\6\0\4\1\3\3\2\1\14\0\46\1\1\0\1\1\5\0\1\1"+
-    "\2\0\70\1\7\0\1\1\17\0\1\3\27\1\11\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\40\3\57\0\1\1\120\0\32\12\1\0"+
-    "\131\12\14\0\326\12\57\0\1\1\1\0\1\12\31\0\11\12\6\3"+
-    "\1\0\5\5\2\0\3\12\1\1\1\1\4\0\126\13\2\0\2\3"+
-    "\2\5\3\13\133\5\1\0\4\5\5\0\51\1\3\0\136\2\21\0"+
-    "\33\1\65\0\20\5\320\0\57\5\1\0\130\5\250\0\u19b6\12\112\0"+
-    "\u51cd\12\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\4"+
-    "\2\1\24\0\57\1\4\3\1\0\12\3\1\0\31\1\7\0\1\3"+
-    "\120\1\2\3\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
-    "\14\0\13\1\115\0\12\1\1\3\3\1\1\3\4\1\1\3\27\1"+
-    "\5\3\30\0\64\1\14\0\2\3\62\1\21\3\13\0\12\4\6\0"+
-    "\22\3\6\1\3\0\1\1\4\0\12\4\34\1\10\3\2\0\27\1"+
-    "\15\3\14\0\35\2\3\0\4\3\57\1\16\3\16\0\1\1\12\4"+
-    "\46\0\51\1\16\3\11\0\3\1\1\3\10\1\2\3\2\0\12\4"+
-    "\6\0\33\20\1\21\4\0\60\20\1\21\1\20\3\21\2\20\2\21"+
-    "\5\20\2\21\1\20\1\21\1\20\30\0\5\20\13\1\5\3\2\0"+
-    "\3\1\2\3\12\0\6\1\2\0\6\1\2\0\6\1\11\0\7\1"+
-    "\1\0\7\1\221\0\43\1\10\3\1\0\2\3\2\0\12\4\6\0"+
-    "\u2ba4\2\14\0\27\2\4\0\61\2\u2104\0\u016e\12\2\0\152\12\46\0"+
-    "\7\1\14\0\5\1\5\0\1\16\1\3\12\16\1\0\15\16\1\0"+
-    "\5\16\1\0\1\16\1\0\2\16\1\0\2\16\1\0\12\16\142\1"+
-    "\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\3"+
-    "\1\7\2\0\1\6\1\7\13\0\7\3\14\0\2\11\30\0\3\11"+
-    "\1\7\1\0\1\10\1\0\1\7\1\6\32\0\5\1\1\0\207\1"+
-    "\2\0\1\3\7\0\1\10\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\13\0\70\5"+
-    "\2\3\37\2\3\0\6\2\2\0\6\2\2\0\6\2\2\0\3\2"+
-    "\34\0\3\3\4\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1"+
-    "\1\0\17\1\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\3"+
-    "\202\0\35\1\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1"+
-    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\4"+
-    "\u0356\0\6\1\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1"+
-    "\2\0\27\1\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1"+
-    "\100\0\1\1\3\3\1\0\2\3\5\0\4\3\4\1\1\0\3\1"+
-    "\1\0\33\1\4\0\3\3\4\0\1\3\40\0\35\1\203\0\66\1"+
-    "\12\0\26\1\12\0\23\1\215\0\111\1\u03b7\0\3\3\65\1\17\3"+
-    "\37\0\12\4\20\0\3\3\55\1\13\3\2\0\1\3\22\0\31\1"+
-    "\7\0\12\4\6\0\3\3\44\1\16\3\1\0\12\4\100\0\3\3"+
-    "\60\1\16\3\4\1\13\0\12\4\u04a6\0\53\1\15\3\10\0\12\4"+
-    "\u0936\0\u036f\1\221\0\143\1\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1"+
-    "\13\0\1\1\56\3\20\0\4\3\15\1\u4060\0\1\5\1\13\u2163\0"+
-    "\5\3\3\0\26\3\2\0\7\3\36\0\4\3\224\0\3\3\u01bb\0"+
-    "\125\1\1\0\107\1\1\0\2\1\2\0\1\1\2\0\2\1\2\0"+
-    "\4\1\1\0\14\1\1\0\1\1\1\0\7\1\1\0\101\1\1\0"+
-    "\4\1\2\0\10\1\1\0\7\1\1\0\34\1\1\0\4\1\1\0"+
-    "\5\1\1\0\1\1\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0"+
-    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
-    "\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0\10\1\2\0"+
-    "\62\4\u1600\0\4\1\1\0\33\1\1\0\2\1\1\0\1\1\2\0"+
-    "\1\1\1\0\12\1\1\0\4\1\1\0\1\1\1\0\1\1\6\0"+
-    "\1\1\4\0\1\1\1\0\1\1\1\0\1\1\1\0\3\1\1\0"+
-    "\2\1\1\0\1\1\2\0\1\1\1\0\1\1\1\0\1\1\1\0"+
-    "\1\1\1\0\1\1\1\0\2\1\1\0\1\1\2\0\4\1\1\0"+
-    "\7\1\1\0\4\1\1\0\4\1\1\0\1\1\1\0\12\1\1\0"+
-    "\21\1\5\0\3\1\1\0\5\1\1\0\21\1\u032a\0\32\17\1\13"+
-    "\u0dff\0\ua6d7\12\51\0\u1035\12\13\0\336\12\u3fe2\0\u021e\12\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
-    "\1\3\36\0\140\3\200\0\360\3\uffff\0\uffff\0\ufe12\0";
+    "\42\0\1\32\1\7\3\0\1\31\2\0\1\7\1\0\1\24\1\0"+
+    "\1\25\1\0\12\21\1\23\1\24\5\0\32\15\4\0\1\26\1\0"+
+    "\32\15\56\0\1\4\1\15\2\0\1\5\1\4\6\0\1\15\1\0"+
+    "\1\23\2\0\1\15\5\0\27\15\1\0\37\15\1\0\u01ca\15\4\0"+
+    "\14\15\5\0\1\23\10\0\5\15\7\0\1\15\1\0\1\15\21\0"+
+    "\160\5\5\15\1\0\2\15\2\0\4\15\1\24\1\15\6\0\1\15"+
+    "\1\23\3\15\1\0\1\15\1\0\24\15\1\0\123\15\1\0\213\15"+
+    "\1\0\7\5\246\15\1\0\46\15\2\0\1\15\7\0\47\15\1\0"+
+    "\1\24\7\0\55\5\1\0\1\5\1\0\2\5\1\0\2\5\1\0"+
+    "\1\5\10\0\33\33\5\0\3\33\1\15\1\23\13\0\6\5\6\0"+
+    "\2\24\2\0\13\5\1\0\1\5\3\0\53\15\25\5\12\20\1\0"+
+    "\1\20\1\24\1\0\2\15\1\5\143\15\1\0\1\15\10\5\1\0"+
+    "\6\5\2\15\2\5\1\0\4\5\2\15\12\20\3\15\2\0\1\15"+
+    "\17\0\1\5\1\15\1\5\36\15\33\5\2\0\131\15\13\5\1\15"+
+    "\16\0\12\20\41\15\11\5\2\15\2\0\1\24\1\0\1\15\5\0"+
+    "\26\15\4\5\1\15\11\5\1\15\3\5\1\15\5\5\22\0\31\15"+
+    "\3\5\104\0\25\15\1\0\10\15\26\0\60\5\66\15\3\5\1\15"+
+    "\22\5\1\15\7\5\12\15\2\5\2\0\12\20\1\0\20\15\3\5"+
+    "\1\0\10\15\2\0\2\15\2\0\26\15\1\0\7\15\1\0\1\15"+
+    "\3\0\4\15\2\0\1\5\1\15\7\5\2\0\2\5\2\0\3\5"+
+    "\1\15\10\0\1\5\4\0\2\15\1\0\3\15\2\5\2\0\12\20"+
+    "\2\15\17\0\3\5\1\0\6\15\4\0\2\15\2\0\26\15\1\0"+
+    "\7\15\1\0\2\15\1\0\2\15\1\0\2\15\2\0\1\5\1\0"+
+    "\5\5\4\0\2\5\2\0\3\5\3\0\1\5\7\0\4\15\1\0"+
+    "\1\15\7\0\12\20\2\5\3\15\1\5\13\0\3\5\1\0\11\15"+
+    "\1\0\3\15\1\0\26\15\1\0\7\15\1\0\2\15\1\0\5\15"+
+    "\2\0\1\5\1\15\10\5\1\0\3\5\1\0\3\5\2\0\1\15"+
+    "\17\0\2\15\2\5\2\0\12\20\11\0\1\15\7\0\3\5\1\0"+
+    "\10\15\2\0\2\15\2\0\26\15\1\0\7\15\1\0\2\15\1\0"+
+    "\5\15\2\0\1\5\1\15\7\5\2\0\2\5\2\0\3\5\10\0"+
+    "\2\5\4\0\2\15\1\0\3\15\2\5\2\0\12\20\1\0\1\15"+
+    "\20\0\1\5\1\15\1\0\6\15\3\0\3\15\1\0\4\15\3\0"+
+    "\2\15\1\0\1\15\1\0\2\15\3\0\2\15\3\0\3\15\3\0"+
+    "\14\15\4\0\5\5\3\0\3\5\1\0\4\5\2\0\1\15\6\0"+
+    "\1\5\16\0\12\20\20\0\4\5\1\0\10\15\1\0\3\15\1\0"+
+    "\27\15\1\0\20\15\3\0\1\15\7\5\1\0\3\5\1\0\4\5"+
+    "\7\0\2\5\1\0\3\15\5\0\2\15\2\5\2\0\12\20\20\0"+
+    "\1\15\3\5\1\0\10\15\1\0\3\15\1\0\27\15\1\0\12\15"+
+    "\1\0\5\15\2\0\1\5\1\15\7\5\1\0\3\5\1\0\4\5"+
+    "\7\0\2\5\7\0\1\15\1\0\2\15\2\5\2\0\12\20\1\0"+
+    "\2\15\16\0\3\5\1\0\10\15\1\0\3\15\1\0\51\15\2\0"+
+    "\1\15\7\5\1\0\3\5\1\0\4\5\1\15\5\0\3\15\1\5"+
+    "\7\0\3\15\2\5\2\0\12\20\12\0\6\15\2\0\2\5\1\0"+
+    "\22\15\3\0\30\15\1\0\11\15\1\0\1\15\2\0\7\15\3\0"+
+    "\1\5\4\0\6\5\1\0\1\5\1\0\10\5\6\0\12\20\2\0"+
+    "\2\5\15\0\60\34\1\35\2\34\7\35\5\0\7\34\10\35\1\0"+
+    "\12\20\47\0\2\34\1\0\1\34\2\0\2\34\1\0\1\34\2\0"+
+    "\1\34\6\0\4\34\1\0\7\34\1\0\3\34\1\0\1\34\1\0"+
+    "\1\34\2\0\2\34\1\0\4\34\1\35\2\34\6\35\1\0\2\35"+
+    "\1\34\2\0\5\34\1\0\1\34\1\0\6\35\2\0\12\20\2\0"+
+    "\4\34\40\0\1\15\27\0\2\5\6\0\12\20\13\0\1\5\1\0"+
+    "\1\5\1\0\1\5\4\0\2\5\10\15\1\0\44\15\4\0\24\5"+
+    "\1\0\2\5\5\15\13\5\1\0\44\5\11\0\1\5\71\0\53\34"+
+    "\24\35\1\34\12\20\6\0\6\34\4\35\4\34\3\35\1\34\3\35"+
+    "\2\34\7\35\3\34\4\35\15\34\14\35\1\34\1\35\12\20\4\35"+
+    "\2\34\46\15\1\0\1\15\5\0\1\15\2\0\53\15\1\0\4\15"+
+    "\u0100\17\111\15\1\0\4\15\2\0\7\15\1\0\1\15\1\0\4\15"+
+    "\2\0\51\15\1\0\4\15\2\0\41\15\1\0\4\15\2\0\7\15"+
+    "\1\0\1\15\1\0\4\15\2\0\17\15\1\0\71\15\1\0\4\15"+
+    "\2\0\103\15\2\0\3\5\40\0\20\15\20\0\126\15\2\0\6\15"+
+    "\3\0\u026c\15\2\0\21\15\1\0\32\15\5\0\113\15\3\0\13\15"+
+    "\7\0\15\15\1\0\4\15\3\5\13\0\22\15\3\5\13\0\22\15"+
+    "\2\5\14\0\15\15\1\0\3\15\1\0\2\5\14\0\64\34\40\35"+
+    "\3\0\1\34\4\0\1\34\1\35\2\0\12\20\41\0\4\5\1\0"+
+    "\12\20\6\0\130\15\10\0\5\15\2\5\42\15\1\5\1\15\5\0"+
+    "\106\15\12\0\37\15\1\0\14\5\4\0\14\5\12\0\12\20\36\34"+
+    "\2\0\5\34\13\0\54\34\4\0\32\34\6\0\12\20\1\34\3\0"+
+    "\2\34\40\0\27\15\5\5\4\0\65\34\12\35\1\0\35\35\2\0"+
+    "\1\5\12\20\6\0\12\20\6\0\16\34\2\0\17\5\101\0\5\5"+
+    "\57\15\21\5\7\15\4\0\12\20\21\0\11\5\14\0\3\5\36\15"+
+    "\15\5\2\15\12\20\54\15\16\5\14\0\44\15\24\5\10\0\12\20"+
+    "\3\0\3\15\12\20\44\15\2\0\11\15\107\0\3\5\1\0\25\5"+
+    "\4\15\1\5\4\15\3\5\2\15\1\0\2\5\6\0\300\15\66\5"+
+    "\5\0\5\5\u0116\15\2\0\6\15\2\0\46\15\2\0\6\15\2\0"+
+    "\10\15\1\0\1\15\1\0\1\15\1\0\1\15\1\0\37\15\2\0"+
+    "\65\15\1\0\7\15\1\0\1\15\3\0\3\15\1\0\7\15\3\0"+
+    "\4\15\2\0\6\15\4\0\15\15\5\0\3\15\1\0\7\15\17\0"+
+    "\1\5\1\12\2\5\10\0\2\25\12\0\1\25\2\0\1\23\2\0"+
+    "\5\5\1\26\14\0\1\4\2\0\2\26\3\0\1\24\4\0\1\4"+
+    "\12\0\1\26\13\0\5\5\1\0\12\5\1\0\1\15\15\0\1\15"+
+    "\20\0\15\15\63\0\23\5\1\10\15\5\21\0\1\15\4\0\1\15"+
+    "\2\0\12\15\1\0\1\15\3\0\5\15\4\0\1\4\1\0\1\15"+
+    "\1\0\1\15\1\0\1\15\1\0\4\15\1\0\12\15\1\16\2\0"+
+    "\4\15\5\0\5\15\4\0\1\15\21\0\51\15\13\0\6\4\17\0"+
+    "\2\4\u016f\0\2\4\14\0\1\4\137\0\1\4\106\0\1\4\31\0"+
+    "\13\4\4\0\3\4\273\0\14\15\1\16\47\15\300\0\2\4\12\0"+
+    "\1\4\11\0\1\4\72\0\4\4\1\0\5\4\1\4\1\0\7\4"+
+    "\1\4\2\4\1\4\1\4\1\0\2\4\2\4\1\4\4\4\1\3"+
+    "\2\4\1\4\1\4\2\4\2\4\1\4\3\4\1\4\3\4\2\4"+
+    "\10\4\3\4\5\4\1\4\1\4\1\4\5\4\14\4\13\4\2\4"+
+    "\2\4\1\4\1\4\2\4\1\4\1\4\22\4\1\4\2\4\2\4"+
+    "\6\4\12\0\2\4\6\4\1\4\1\4\1\4\2\4\3\4\2\4"+
+    "\10\4\2\4\4\4\2\4\13\4\2\4\5\4\2\4\2\4\1\4"+
+    "\5\4\2\4\1\4\1\4\1\4\2\4\24\4\2\4\5\4\6\4"+
+    "\1\4\2\4\1\3\1\4\2\4\1\4\4\4\1\4\2\4\1\4"+
+    "\2\0\2\4\4\3\1\4\1\4\2\4\1\4\1\0\1\4\1\0"+
+    "\1\4\6\0\1\4\3\0\1\4\6\0\1\4\12\0\2\4\17\0"+
+    "\1\4\2\0\1\4\4\0\1\4\1\0\1\4\4\0\3\4\1\0"+
+    "\1\4\13\0\2\4\3\4\55\0\3\4\11\0\1\4\16\0\1\4"+
+    "\16\0\1\4\u0174\0\2\4\u01cf\0\3\4\23\0\2\4\63\0\1\4"+
+    "\4\0\1\4\252\0\57\15\1\0\57\15\1\0\205\15\6\0\4\15"+
+    "\3\5\2\15\14\0\46\15\1\0\1\15\5\0\1\15\2\0\70\15"+
+    "\7\0\1\15\17\0\1\5\27\15\11\0\7\15\1\0\7\15\1\0"+
+    "\7\15\1\0\7\15\1\0\7\15\1\0\7\15\1\0\7\15\1\0"+
+    "\7\15\1\0\40\5\57\0\1\15\120\0\32\27\1\0\131\27\14\0"+
+    "\326\27\57\0\1\15\1\0\1\27\31\0\11\27\6\5\1\4\5\22"+
+    "\2\0\3\27\1\15\1\15\1\4\3\0\126\30\2\0\2\5\2\22"+
+    "\3\30\133\22\1\0\4\22\5\0\51\15\3\0\136\17\21\0\33\15"+
+    "\65\0\20\22\227\0\1\4\1\0\1\4\66\0\57\22\1\0\130\22"+
+    "\250\0\u19b6\27\112\0\u51d6\27\52\0\u048d\15\103\0\56\15\2\0\u010d\15"+
+    "\3\0\20\15\12\20\2\15\24\0\57\15\4\5\1\0\12\5\1\0"+
+    "\37\15\2\5\120\15\2\5\45\0\11\15\2\0\147\15\2\0\44\15"+
+    "\1\0\10\15\77\0\13\15\1\5\3\15\1\5\4\15\1\5\27\15"+
+    "\5\5\30\0\64\15\14\0\2\5\62\15\22\5\12\0\12\20\6\0"+
+    "\22\5\6\15\3\0\1\15\1\0\1\15\2\0\12\20\34\15\10\5"+
+    "\2\0\27\15\15\5\14\0\35\17\3\0\4\5\57\15\16\5\16\0"+
+    "\1\15\12\20\6\0\5\34\1\35\12\34\12\20\5\34\1\0\51\15"+
+    "\16\5\11\0\3\15\1\5\10\15\2\5\2\0\12\20\6\0\33\34"+
+    "\3\35\62\34\1\35\1\34\3\35\2\34\2\35\5\34\2\35\1\34"+
+    "\1\35\1\34\30\0\5\34\13\15\5\5\2\0\3\15\2\5\12\0"+
+    "\6\15\2\0\6\15\2\0\6\15\11\0\7\15\1\0\7\15\1\0"+
+    "\53\15\1\0\12\15\12\0\163\15\10\5\1\0\2\5\2\0\12\20"+
+    "\6\0\u2ba4\17\14\0\27\17\4\0\61\17\u2104\0\u016e\27\2\0\152\27"+
+    "\46\0\7\15\14\0\5\15\5\0\1\33\1\5\12\33\1\0\15\33"+
+    "\1\0\5\33\1\0\1\33\1\0\2\33\1\0\2\33\1\0\12\33"+
+    "\142\15\41\0\u016b\15\22\0\100\15\2\0\66\15\50\0\14\15\4\0"+
+    "\16\5\1\6\1\11\1\24\2\0\1\23\1\24\13\0\20\5\3\0"+
+    "\2\26\30\0\3\26\1\24\1\0\1\25\1\0\1\24\1\23\32\0"+
+    "\5\15\1\0\207\15\2\0\1\5\7\0\1\25\4\0\1\24\1\0"+
+    "\1\25\1\0\12\20\1\23\1\24\5\0\32\15\4\0\1\26\1\0"+
+    "\32\15\13\0\70\22\2\5\37\17\3\0\6\17\2\0\6\17\2\0"+
+    "\6\17\2\0\3\17\34\0\3\5\4\0\14\15\1\0\32\15\1\0"+
+    "\23\15\1\0\2\15\1\0\17\15\2\0\16\15\42\0\173\15\105\0"+
+    "\65\15\210\0\1\5\202\0\35\15\3\0\61\15\17\0\1\5\37\0"+
+    "\40\15\20\0\33\15\5\0\46\15\5\5\5\0\36\15\2\0\44\15"+
+    "\4\0\10\15\1\0\5\15\52\0\236\15\2\0\12\20\6\0\44\15"+
+    "\4\0\44\15\4\0\50\15\10\0\64\15\234\0\u0137\15\11\0\26\15"+
+    "\12\0\10\15\230\0\6\15\2\0\1\15\1\0\54\15\1\0\2\15"+
+    "\3\0\1\15\2\0\27\15\12\0\27\15\11\0\37\15\101\0\23\15"+
+    "\1\0\2\15\12\0\26\15\12\0\32\15\106\0\70\15\6\0\2\15"+
+    "\100\0\1\15\3\5\1\0\2\5\5\0\4\5\4\15\1\0\3\15"+
+    "\1\0\33\15\4\0\3\5\4\0\1\5\40\0\35\15\3\0\35\15"+
+    "\43\0\10\15\1\0\34\15\2\5\31\0\66\15\12\0\26\15\12\0"+
+    "\23\15\15\0\22\15\156\0\111\15\67\0\63\15\15\0\63\15\u030d\0"+
+    "\3\5\65\15\17\5\37\0\12\20\17\0\4\5\55\15\13\5\2\0"+
+    "\1\5\22\0\31\15\7\0\12\20\6\0\3\5\44\15\16\5\1\0"+
+    "\12\20\20\0\43\15\1\5\2\0\1\15\11\0\3\5\60\15\16\5"+
+    "\4\15\5\0\3\5\3\0\12\20\1\15\1\0\1\15\43\0\22\15"+
+    "\1\0\31\15\14\5\6\0\1\5\101\0\7\15\1\0\1\15\1\0"+
+    "\4\15\1\0\17\15\1\0\12\15\7\0\57\15\14\5\5\0\12\20"+
+    "\6\0\4\5\1\0\10\15\2\0\2\15\2\0\26\15\1\0\7\15"+
+    "\1\0\2\15\1\0\5\15\2\0\1\5\1\15\7\5\2\0\2\5"+
+    "\2\0\3\5\2\0\1\15\6\0\1\5\5\0\5\15\2\5\2\0"+
+    "\7\5\3\0\5\5\213\0\65\15\22\5\4\15\5\0\12\20\46\0"+
+    "\60\15\24\5\2\15\1\0\1\15\10\0\12\20\246\0\57\15\7\5"+
+    "\2\0\11\5\27\0\4\15\2\5\42\0\60\15\21\5\3\0\1\15"+
+    "\13\0\12\20\46\0\53\15\15\5\10\0\12\20\66\0\32\34\3\0"+
+    "\17\35\4\0\12\20\2\34\3\0\1\34\u0160\0\100\15\12\20\25\0"+
+    "\1\15\u01c0\0\71\15\u0107\0\11\15\1\0\45\15\10\5\1\0\10\5"+
+    "\1\15\17\0\12\20\30\0\36\15\2\0\26\5\1\0\16\5\u0349\0"+
+    "\u039a\15\146\0\157\15\21\0\304\15\u0abc\0\u042f\15\u0fd1\0\u0247\15\u21b9\0"+
+    "\u0239\15\7\0\37\15\1\0\12\20\146\0\36\15\2\0\5\5\13\0"+
+    "\60\15\7\5\11\0\4\15\14\0\12\20\11\0\25\15\5\0\23\15"+
+    "\u0370\0\105\15\13\0\1\15\56\5\20\0\4\5\15\15\100\0\1\15"+
+    "\u401f\0\1\22\1\30\u0bfe\0\153\15\5\0\15\15\3\0\11\15\7\0"+
+    "\12\15\3\0\2\5\1\0\4\5\u14c1\0\5\5\3\0\26\5\2\0"+
+    "\7\5\36\0\4\5\224\0\3\5\u01bb\0\125\15\1\0\107\15\1\0"+
+    "\2\15\2\0\1\15\2\0\2\15\2\0\4\15\1\0\14\15\1\0"+
+    "\1\15\1\0\7\15\1\0\101\15\1\0\4\15\2\0\10\15\1\0"+
+    "\7\15\1\0\34\15\1\0\4\15\1\0\5\15\1\0\1\15\3\0"+
+    "\7\15\1\0\u0154\15\2\0\31\15\1\0\31\15\1\0\37\15\1\0"+
+    "\31\15\1\0\37\15\1\0\31\15\1\0\37\15\1\0\31\15\1\0"+
+    "\37\15\1\0\31\15\1\0\10\15\2\0\62\20\u0200\0\67\5\4\0"+
+    "\62\5\10\0\1\5\16\0\1\5\26\0\5\5\1\0\17\5\u0550\0"+
+    "\7\5\1\0\21\5\2\0\7\5\1\0\2\5\1\0\5\5\u07d5\0"+
+    "\305\15\13\0\7\5\51\0\104\15\7\5\5\0\12\20\u04a6\0\4\15"+
+    "\1\0\33\15\1\0\2\15\1\0\1\15\2\0\1\15\1\0\12\15"+
+    "\1\0\4\15\1\0\1\15\1\0\1\15\6\0\1\15\4\0\1\15"+
+    "\1\0\1\15\1\0\1\15\1\0\3\15\1\0\2\15\1\0\1\15"+
+    "\2\0\1\15\1\0\1\15\1\0\1\15\1\0\1\15\1\0\1\15"+
+    "\1\0\2\15\1\0\1\15\2\0\4\15\1\0\7\15\1\0\4\15"+
+    "\1\0\4\15\1\0\1\15\1\0\12\15\1\0\21\15\5\0\3\15"+
+    "\1\0\5\15\1\0\21\15\u0144\0\4\4\1\4\312\4\1\4\60\4"+
+    "\15\0\3\4\37\0\1\4\32\15\6\0\32\15\2\0\4\4\2\16"+
+    "\14\15\2\16\12\15\4\0\1\4\2\0\12\4\22\0\71\4\32\1"+
+    "\1\30\2\4\15\4\12\0\1\4\24\0\1\4\2\0\11\4\1\0"+
+    "\4\4\11\0\7\4\2\4\256\4\42\4\2\4\141\4\1\3\16\4"+
+    "\2\4\2\4\1\4\3\4\2\4\44\4\3\3\2\4\1\3\2\4"+
+    "\3\3\44\4\2\4\3\4\1\4\4\4\5\2\102\4\2\3\2\4"+
+    "\13\3\25\4\4\3\4\4\1\3\1\4\11\3\3\4\1\3\4\4"+
+    "\3\3\1\4\3\3\42\4\1\3\123\4\1\4\77\4\10\0\3\4"+
+    "\6\4\1\4\30\4\7\4\2\4\2\4\1\4\2\3\4\4\1\3"+
+    "\14\4\1\4\2\4\4\4\2\4\1\3\4\4\2\3\15\4\2\4"+
+    "\2\4\1\4\10\4\2\4\11\4\1\4\5\4\3\4\14\4\3\4"+
+    "\10\4\3\4\2\4\1\4\1\4\1\4\4\4\1\4\6\4\1\4"+
+    "\3\4\1\4\6\4\113\4\3\3\3\4\5\3\60\0\43\4\1\3"+
+    "\20\4\3\3\11\4\1\3\5\4\5\4\1\4\1\3\6\4\15\4"+
+    "\6\4\3\4\1\4\1\4\2\4\3\4\1\4\2\4\7\4\6\4"+
+    "\164\0\14\4\125\0\53\4\14\0\4\4\70\0\10\4\12\0\6\4"+
+    "\50\0\10\4\36\0\122\4\14\0\4\4\10\4\5\3\1\4\2\3"+
+    "\6\4\1\3\11\4\12\3\1\4\1\0\1\4\2\3\1\4\6\4"+
+    "\1\0\52\4\2\4\4\4\3\4\1\4\1\4\47\4\15\4\5\4"+
+    "\2\3\1\4\2\3\6\4\3\4\15\4\1\4\15\3\42\4\u05fe\4"+
+    "\2\0\ua6d7\27\51\0\u1035\27\13\0\336\27\2\0\u1682\27\u295e\0\u021e\27"+
+    "\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
+    "\1\5\36\0\137\13\1\14\200\0\360\5\uffff\0\uffff\0\ufe12\0";
 
   /** 
    * Translates characters to character classes
@@ -218,12 +284,15 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
-    "\1\7\1\2\1\1\1\10\1\2\1\0\1\2\1\0"+
-    "\1\4\1\0\2\2\2\0\1\1\1\0";
+    "\1\0\2\1\3\2\2\1\1\3\1\2\1\4\2\5"+
+    "\1\6\1\1\1\7\1\10\1\3\1\11\1\2\1\0"+
+    "\4\2\1\0\1\2\2\0\1\3\1\0\1\3\2\2"+
+    "\1\0\1\5\1\2\1\5\1\0\2\3\1\0\2\2"+
+    "\2\0\1\2\1\0\2\3\5\2\1\0\1\2\1\3"+
+    "\3\2";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -248,12 +317,17 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\22\0\44\0\66\0\110\0\132\0\154\0\176"+
-    "\0\220\0\242\0\264\0\306\0\330\0\352\0\374\0\u010e"+
-    "\0\u0120\0\154\0\u0132\0\u0144\0\u0156\0\264\0\u0168\0\u017a";
+    "\0\0\0\36\0\74\0\132\0\170\0\226\0\264\0\322"+
+    "\0\360\0\u010e\0\u012c\0\u014a\0\u0168\0\u0186\0\u01a4\0\u01c2"+
+    "\0\u01e0\0\u01fe\0\u021c\0\u023a\0\74\0\u0258\0\u0276\0\u0294"+
+    "\0\u02b2\0\264\0\u02d0\0\u02ee\0\322\0\u030c\0\u032a\0\u0348"+
+    "\0\u0366\0\u0384\0\u03a2\0\u03c0\0\u03de\0\u03fc\0\u01a4\0\u041a"+
+    "\0\u0438\0\u0456\0\u0474\0\u0492\0\u04b0\0\u04ce\0\u04ec\0\u050a"+
+    "\0\u0528\0\u0546\0\u0564\0\u0582\0\u05a0\0\u05be\0\u05dc\0\u05fa"+
+    "\0\36\0\u0618\0\360\0\u0636\0\u0654";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -276,33 +350,94 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_TRANS = zzUnpackTrans();
 
   private static final String ZZ_TRANS_PACKED_0 =
-    "\1\2\1\3\1\4\1\2\1\5\1\6\3\2\1\7"+
-    "\1\10\1\11\2\2\1\12\1\13\2\14\23\0\3\3"+
-    "\1\15\1\0\1\16\1\0\1\16\1\17\2\0\1\16"+
-    "\1\0\1\12\2\0\1\3\1\0\1\3\2\4\1\15"+
-    "\1\0\1\16\1\0\1\16\1\17\2\0\1\16\1\0"+
-    "\1\12\2\0\1\4\1\0\2\3\2\5\2\0\2\20"+
-    "\1\21\2\0\1\20\1\0\1\12\2\0\1\5\3\0"+
-    "\1\6\1\0\1\6\3\0\1\17\7\0\1\6\1\0"+
-    "\2\3\1\22\1\5\1\23\3\0\1\22\4\0\1\12"+
-    "\2\0\1\22\3\0\1\10\15\0\1\10\3\0\1\11"+
-    "\15\0\1\11\1\0\2\3\1\12\1\15\1\0\1\16"+
-    "\1\0\1\16\1\17\2\0\1\24\1\25\1\12\2\0"+
-    "\1\12\3\0\1\26\13\0\1\27\1\0\1\26\3\0"+
-    "\1\14\14\0\2\14\1\0\2\3\2\15\2\0\2\30"+
-    "\1\17\2\0\1\30\1\0\1\12\2\0\1\15\1\0"+
-    "\2\3\1\16\12\0\1\3\2\0\1\16\1\0\2\3"+
-    "\1\17\1\15\1\23\3\0\1\17\4\0\1\12\2\0"+
-    "\1\17\3\0\1\20\1\5\14\0\1\20\1\0\2\3"+
-    "\1\21\1\5\1\23\3\0\1\21\4\0\1\12\2\0"+
-    "\1\21\3\0\1\23\1\0\1\23\3\0\1\17\7\0"+
-    "\1\23\1\0\2\3\1\24\1\15\4\0\1\17\4\0"+
-    "\1\12\2\0\1\24\3\0\1\25\12\0\1\24\2\0"+
-    "\1\25\3\0\1\27\13\0\1\27\1\0\1\27\3\0"+
-    "\1\30\1\15\14\0\1\30";
+    "\1\2\1\3\1\4\1\5\1\6\2\2\1\7\2\2"+
+    "\1\10\2\2\1\11\1\12\1\13\1\14\1\15\1\16"+
+    "\3\2\1\17\1\20\1\21\2\2\1\22\2\23\37\0"+
+    "\1\24\3\0\2\25\1\0\5\25\20\0\1\25\5\0"+
+    "\1\4\2\0\1\4\1\0\1\26\2\4\20\0\1\4"+
+    "\2\0\1\4\2\0\1\5\2\0\1\5\1\27\1\30"+
+    "\2\5\20\0\1\5\5\0\1\6\2\0\1\6\1\27"+
+    "\1\31\2\6\20\0\1\6\5\0\1\32\2\0\1\33"+
+    "\1\34\3\32\20\0\1\32\3\0\1\5\1\6\5\0"+
+    "\1\35\3\0\1\6\24\0\2\11\1\0\10\11\2\36"+
+    "\1\0\1\37\1\0\1\37\1\40\2\0\1\37\1\0"+
+    "\1\22\1\0\1\11\5\0\1\12\1\11\1\0\1\12"+
+    "\1\41\1\42\2\12\3\11\2\36\1\0\1\37\1\0"+
+    "\1\37\1\40\2\0\1\37\1\0\1\22\1\0\1\12"+
+    "\5\0\2\13\1\0\5\13\2\11\1\13\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\13\5\0\2\14\1\0\5\14\3\11\2\14"+
+    "\2\0\2\43\1\44\2\0\1\43\1\0\1\22\1\0"+
+    "\1\14\5\0\1\15\1\14\1\0\1\45\1\46\3\15"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\15\5\0\2\16\1\0\5\16\5\0"+
+    "\1\16\3\0\1\40\6\0\1\16\5\0\2\47\1\0"+
+    "\5\47\3\11\2\14\1\50\3\0\1\47\4\0\1\22"+
+    "\1\0\1\47\5\0\2\20\1\0\5\20\20\0\1\20"+
+    "\5\0\2\21\1\0\5\21\20\0\1\21\5\0\2\22"+
+    "\1\0\5\22\3\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\51\1\52\1\22\1\0\1\22\5\0"+
+    "\2\23\1\0\5\23\17\0\2\23\5\0\2\24\1\0"+
+    "\5\24\20\0\1\24\2\0\1\4\1\53\1\54\1\4"+
+    "\2\0\1\4\1\0\1\26\2\4\1\0\1\54\16\0"+
+    "\1\4\12\0\1\55\1\56\24\0\1\4\1\53\1\54"+
+    "\1\5\2\0\1\5\1\27\1\30\2\5\1\0\1\54"+
+    "\16\0\1\5\2\0\1\4\1\53\1\54\1\6\2\0"+
+    "\1\6\1\27\1\31\2\6\1\0\1\54\16\0\1\6"+
+    "\5\0\1\33\2\0\1\33\1\34\3\33\20\0\1\33"+
+    "\10\0\1\57\32\0\2\36\1\0\5\36\3\11\2\36"+
+    "\2\0\2\60\1\40\2\0\1\60\1\0\1\22\1\0"+
+    "\1\36\5\0\2\37\1\0\5\37\3\11\13\0\1\11"+
+    "\1\0\1\37\5\0\2\40\1\0\5\40\3\11\2\36"+
+    "\1\50\3\0\1\40\4\0\1\22\1\0\1\40\5\0"+
+    "\2\11\1\0\2\11\1\61\1\62\4\11\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\11\2\0\1\4\1\53\1\54\1\12\1\11"+
+    "\1\0\1\12\1\41\1\42\2\12\1\11\1\63\1\11"+
+    "\2\36\1\0\1\37\1\0\1\37\1\40\2\0\1\37"+
+    "\1\0\1\22\1\0\1\12\5\0\2\43\1\0\5\43"+
+    "\3\0\2\14\13\0\1\43\5\0\2\44\1\0\5\44"+
+    "\3\11\2\14\1\50\3\0\1\44\4\0\1\22\1\0"+
+    "\1\44\5\0\1\45\1\14\1\0\1\45\1\46\3\45"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\45\5\0\2\14\1\0\1\64\4\14"+
+    "\3\11\2\14\2\0\2\43\1\44\2\0\1\43\1\0"+
+    "\1\22\1\0\1\14\5\0\2\50\1\0\5\50\5\0"+
+    "\1\50\3\0\1\40\6\0\1\50\5\0\2\51\1\0"+
+    "\5\51\3\11\2\36\4\0\1\40\4\0\1\22\1\0"+
+    "\1\51\5\0\2\52\1\0\5\52\16\0\1\51\1\0"+
+    "\1\52\2\0\1\4\2\0\1\53\2\0\1\53\1\65"+
+    "\1\66\2\53\20\0\1\53\5\0\1\54\2\0\1\54"+
+    "\1\65\1\67\2\54\20\0\1\54\2\0\1\4\1\53"+
+    "\1\54\5\0\1\70\3\0\1\54\32\0\1\56\1\71"+
+    "\26\0\1\57\2\0\1\57\1\0\3\57\20\0\1\57"+
+    "\5\0\2\60\1\0\5\60\3\0\2\36\13\0\1\60"+
+    "\2\0\1\4\1\53\1\54\2\11\1\0\2\11\1\72"+
+    "\3\11\1\63\1\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\11\5\0"+
+    "\2\11\1\0\3\11\1\62\1\73\3\11\2\36\1\0"+
+    "\1\37\1\0\1\37\1\40\2\0\1\37\1\0\1\22"+
+    "\1\0\1\11\5\0\1\63\1\11\1\0\1\63\1\74"+
+    "\1\75\2\63\3\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\63\5\0"+
+    "\1\64\1\14\1\0\1\64\1\14\3\64\3\11\2\14"+
+    "\2\0\2\43\1\44\2\0\1\43\1\0\1\22\1\0"+
+    "\1\64\12\0\1\55\25\0\1\4\1\53\1\54\1\53"+
+    "\2\0\1\53\1\65\1\66\2\53\1\0\1\54\16\0"+
+    "\1\53\2\0\1\4\1\53\2\54\2\0\1\54\1\65"+
+    "\1\67\2\54\1\0\1\54\16\0\1\54\3\0\1\53"+
+    "\1\54\5\0\1\70\3\0\1\54\22\0\1\53\1\54"+
+    "\2\11\1\0\2\11\1\72\3\11\1\63\1\11\2\36"+
+    "\1\0\1\37\1\0\1\37\1\40\2\0\1\37\1\0"+
+    "\1\22\1\0\1\11\5\0\2\11\1\0\2\11\1\61"+
+    "\5\11\2\36\1\0\1\37\1\0\1\37\1\40\2\0"+
+    "\1\37\1\0\1\22\1\0\1\11\2\0\1\4\1\53"+
+    "\1\54\1\63\1\11\1\0\1\63\1\74\1\75\2\63"+
+    "\1\11\1\63\1\11\2\36\1\0\1\37\1\0\1\37"+
+    "\1\40\2\0\1\37\1\0\1\22\1\0\1\63";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[396];
+    int [] result = new int[1650];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -329,7 +464,7 @@ public final class StandardTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -340,11 +475,12 @@ public final class StandardTokenizerImpl {
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\13\1\1\0\1\1\1\0\1\1\1\0"+
-    "\2\1\2\0\1\1\1\0";
+    "\1\0\1\11\22\1\1\0\4\1\1\0\1\1\2\0"+
+    "\1\1\1\0\3\1\1\0\3\1\1\0\2\1\1\0"+
+    "\2\1\2\0\1\1\1\0\7\1\1\0\1\11\4\1";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[24];
+    int [] result = new int[61];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;
@@ -401,11 +537,11 @@ public final class StandardTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -447,6 +583,9 @@ public final class StandardTokenizerImpl {
 
   /** Hangul token type */
   public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+  
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = StandardTokenizer.EMOJI;
 
   /** Character count processed so far */
   public final int yychar()
@@ -492,7 +631,7 @@ public final class StandardTokenizerImpl {
     char [] map = new char[0x110000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2836) {
+    while (i < 4122) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -500,6 +639,8 @@ public final class StandardTokenizerImpl {
     return map;
   }
 
+/* -------------------------------------------------------------------------------- */
+/* Begin Lucene-specific disable-buffer-expansion modifications to skeleton.default */
 
   /**
    * Refills the input buffer.
@@ -527,32 +668,45 @@ public final class StandardTokenizerImpl {
 
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
+    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;
+    if (requested == 0) {
+      return true;
     }
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
-        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+        if (numRead == requested) { // We might have requested too few chars to encode a full Unicode character.
           --zzEndRead;
           zzFinalHighSurrogate = 1;
-          if (totalRead == 1) { return true; }
+          if (numRead == 1) {
+            return true;
+          }
+        } else {                    // There is room in the buffer for at least one more char
+          int c = zzReader.read();  // Expecting to read a low surrogate char
+          if (c == -1) {
+            return true;
+          } else {
+            zzBuffer[zzEndRead++] = (char)c;
+            return false;
+          }
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
+/* End Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+/* ------------------------------------------------------------------------------ */
     
   /**
    * Closes the input stream.
@@ -773,49 +927,62 @@ public final class StandardTokenizerImpl {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
-          }
-        case 9: break;
-        case 2: 
-          { return WORD_TYPE;
-          }
-        case 10: break;
-        case 3: 
-          { return HANGUL_TYPE;
-          }
-        case 11: break;
-        case 4: 
-          { return NUMERIC_TYPE;
-          }
-        case 12: break;
-        case 5: 
-          { return KATAKANA_TYPE;
-          }
-        case 13: break;
-        case 6: 
-          { return IDEOGRAPHIC_TYPE;
-          }
-        case 14: break;
-        case 7: 
-          { return HIRAGANA_TYPE;
-          }
-        case 15: break;
-        case 8: 
-          { return SOUTH_EAST_ASIAN_TYPE;
-          }
-        case 16: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
               {
                 return YYEOF;
               }
-          } 
-          else {
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */
+            } 
+            // fall through
+          case 10: break;
+          case 2: 
+            { return EMOJI_TYPE;
+            } 
+            // fall through
+          case 11: break;
+          case 3: 
+            { return WORD_TYPE;
+            } 
+            // fall through
+          case 12: break;
+          case 4: 
+            { return HANGUL_TYPE;
+            } 
+            // fall through
+          case 13: break;
+          case 5: 
+            { return NUMERIC_TYPE;
+            } 
+            // fall through
+          case 14: break;
+          case 6: 
+            { return KATAKANA_TYPE;
+            } 
+            // fall through
+          case 15: break;
+          case 7: 
+            { return IDEOGRAPHIC_TYPE;
+            } 
+            // fall through
+          case 16: break;
+          case 8: 
+            { return HIRAGANA_TYPE;
+            } 
+            // fall through
+          case 17: break;
+          case 9: 
+            { return SOUTH_EAST_ASIAN_TYPE;
+            } 
+            // fall through
+          case 18: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
index a1e7b17..e95a9b4 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@@ -34,12 +34,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %integer
 %final
 %public
@@ -48,22 +49,67 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %char
 %buffer 255
 
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
-NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
-HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
+
+//////////////////////////////////////////////////////////////////////////
+// Begin Emoji Macros - see documentation below, near the EMOJI_TYPE rule
+
+// TODO: Remove this include file when JFlex supports these properties directly (in Unicode 11.0+)
+%include ../../../../../../data/jflex/UnicodeEmojiProperties.jflex
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+//   \uFE0E (Text Presentation Selector) and \uFE0F (Emoji Presentation Selector) - included in \p{WB:Extend}
+//   - are explicitly excluded here so that we can properly handle Emoji sequences.
+//
+ExtFmtZwjSansPresSel = [[\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]--[\uFE0E\uFE0F]]*
+
+KeyCapBaseChar = [0-9#*]
+KeyCapBaseCharEx = {KeyCapBaseChar} {ExtFmtZwjSansPresSel}
+KeyCap = \u20E3
+KeyCapEx = {KeyCap} {ExtFmtZwjSansPresSel}
+
+// # \u3030 = WAVY DASH; \u303D = PART ALTERNATION MARK
+AccidentalEmoji = [ยฉยฎโ„ข\u3030\u303D]
+EmojiRKAM = ( \p{WB:Regional_Indicator} | {KeyCapBaseChar} | {AccidentalEmoji} | {Emoji_Modifier} )
+
+// Unlike Unicode properties, macros are not allowed in character classes, so we achieve set difference
+// by applying DeMorgan: the expression that matches everything of 'a' not matched by 'b' is: !(!a|b)
+// TODO: Convert this expression to character class difference when JFlex supports the properties directly (in Unicode 11.0+)
+EmojiSansRKAM = !( ! {Emoji} | {EmojiRKAM} )
+
+EmojiChar = ( {Extended_Pictographic} | {EmojiSansRKAM} )
+
+EmojiCharEx         = {EmojiChar}           {ExtFmtZwjSansPresSel}
+EmojiModifierBaseEx = {Emoji_Modifier_Base} {ExtFmtZwjSansPresSel}
+EmojiModifierEx     = {Emoji_Modifier}      {ExtFmtZwjSansPresSel}
+
+EmojiPresentationSelector = \uFE0F
+EmojiCharOrPresSeqOrModSeq = ( \p{WB:ZWJ}* {EmojiCharEx} {EmojiPresentationSelector}? ) | ( ( \p{WB:ZWJ}* {EmojiModifierBaseEx} )? {EmojiModifierEx} )
+TagSpec = [\u{E0020}-\u{E007E}]
+TagTerm = \u{E007F}
+
+// End Emoji Macros
+//////////////////////////////////////////////////////////////////////////
+
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] {ExtFmtZwj}
+AHLetterEx          = [\p{WB:ALetter}\p{WB:Hebrew_Letter}]                      {ExtFmtZwj}
+NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        {ExtFmtZwj}
+KatakanaEx          = \p{WB:Katakana}                                           {ExtFmtZwj} 
+MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      {ExtFmtZwj} 
+MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         {ExtFmtZwj}
+ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       {ExtFmtZwj}
+HanEx               = \p{Script:Han}                                            {ExtFmtZwj}
+HiraganaEx          = \p{Script:Hiragana}                                       {ExtFmtZwj}
+SingleQuoteEx       = \p{WB:Single_Quote}                                       {ExtFmtZwj}
+DoubleQuoteEx       = \p{WB:Double_Quote}                                       {ExtFmtZwj}
+HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      {ExtFmtZwj}
+RegionalIndicatorEx = \p{WB:Regional_Indicator}                                 {ExtFmtZwj}
+ComplexContextEx    = \p{LB:Complex_Context}                                    {ExtFmtZwj}
 
 %{
   /** Alphanumeric sequences */
@@ -93,6 +139,9 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
   /** Hangul token type */
   public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+  
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = StandardTokenizer.EMOJI;
 
   /** Character count processed so far */
   public final int yychar()
@@ -120,18 +169,64 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
 %%
 
-// UAX#29 WB1.   sot   รท
-//        WB2.     รท   eot
+// UAX#29 WB1.    sot รท Any
+//        WB2.    Any รท eot
 //
 <<EOF>> { return YYEOF; }
 
-// UAX#29 WB8.   Numeric ร— Numeric
-//        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) ร— Numeric
-//        WB12.  Numeric ร— (MidNum | MidNumLet | Single_Quote) Numeric
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-//        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
+// Instead of these: UAX#29 WB3c. ZWJ ร— (Glue_After_Zwj | EBG)
+//                          WB14. (E_Base | EBG) ร— E_Modifier
+//                          WB15. ^ (RI RI)* RI ร— RI
+//                          WB16. [^RI] (RI RI)* RI ร— RI
+//
+// We use the "emoji_sequence" rule from http://www.unicode.org/reports/tr51/tr51-14.html (Unicode 11.0)
+// and the Emoji data from http://unicode.org/Public/emoji/11.0/emoji-data.txt (in included file UnicodeEmojiProperties.jflex)
+// 
+// emoji_sequence :=
+//    Top-level EBNF           Expanded #1                       Expanded #2                       Expanded #3
+//    ---------------------    ----------------------------      -----------------------------     ----------------------------------------------
+//      emoji_core_sequence      emoji_combining_sequence          emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_keycap_sequence           | [0-9#*] \u{FE0F 20E3}      [1]
+//                             | emoji_modifier_sequence                                           | \p{Emoji_Modifier_Base} \p{Emoji_Modifier}
+//                             | emoji_flag_sequence                                               | \p{WB:Regional_Indicator}{2}               )
+//
+//    | emoji_zwj_sequence       emoji_zwj_element                 emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+//                             ( ZWJ emoji_zwj_element )+                                          ( \p{WB:ZWJ} ^^ )+
+// 
+//    | emoji_tag_sequence     tag_base                            emoji_character                 ( \p{Emoji}
+//                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+//                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+//                             tag_spec                                                            [\u{E0020}-\u{E007E}]+
+//                             tag_term                                                            \u{E007F}
+//
+// [1] https://unicode.org/Public/emoji/11.0/emoji-test.txt includes key cap sequences 
+//     WITHOUT \uFE0F (emoji presentation indicator), annotating them as "non-fully-qualified";
+//     TR#51 says about non-fully-qualified *ZWJ sequences* that implementations may
+//     choose whether to support them for segmentation.  This implementation will
+//     recognize /[0-9#*]\u20E3/ - i.e. without \uFE0F - as Emoji. 
+//
+// See also: http://www.unicode.org/L2/L2016/16315-handling-seg-emoji.pdf
+//           https://docs.google.com/document/d/1yDZ5TUZNVVKaM9zYCCLbRIAKGNZANsAGl0bcNzGGvn8
+//
+//     In particular, the above docs recommend a modified UAX#29 WB3c rule (covered by TR#51's "emoji_zwj_sequence"):
+//
+//         WB3cโ€ฒ ZWJ ร— โ€‹(Extended_Pictographic | EmojiNRK)
+//
+  {EmojiCharOrPresSeqOrModSeq} ( ( \p{WB:ZWJ} {EmojiCharOrPresSeqOrModSeq} )* | {TagSpec}+ {TagTerm} ) 
+| {KeyCapBaseCharEx} {EmojiPresentationSelector}? {KeyCapEx} 
+| {RegionalIndicatorEx}{2} 
+  { return EMOJI_TYPE; }
+
+// UAX#29 WB8.    Numeric ร— Numeric
+//        WB11.   Numeric (MidNum | MidNumLetQ) ร— Numeric
+//        WB12.   Numeric ร— (MidNum | MidNumLetQ) Numeric
+//        WB13a.  (AHLetter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+//        WB13b.  ExtendNumLet ร— (AHLetter | Numeric | Katakana)
 //
-{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* 
+{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
   { return NUMERIC_TYPE; }
 
 // subset of the below for typing purposes only!
@@ -141,28 +236,28 @@ ComplexContextEx    = \p{LB:Complex_Context}
 {KatakanaEx}+
   { return KATAKANA_TYPE; }
 
-// UAX#29 WB5.   (ALetter | Hebrew_Letter) ร— (ALetter | Hebrew_Letter)
-//        WB6.   (ALetter | Hebrew_Letter) ร— (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-//        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) ร— (ALetter | Hebrew_Letter)
-//        WB7a.  Hebrew_Letter ร— Single_Quote
-//        WB7b.  Hebrew_Letter ร— Double_Quote Hebrew_Letter
-//        WB7c.  Hebrew_Letter Double_Quote ร— Hebrew_Letter
-//        WB9.   (ALetter | Hebrew_Letter) ร— Numeric
-//        WB10.  Numeric ร— (ALetter | Hebrew_Letter)
-//        WB13.  Katakana ร— Katakana
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-//        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
-//
-{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+// UAX#29 WB5.    AHLetter ร— AHLetter
+//        WB6.    AHLetter ร— (MidLetter | MidNumLetQ) AHLetter
+//        WB7.    AHLetter (MidLetter | MidNumLetQ) ร— AHLetter
+//        WB7a.   Hebrew_Letter ร— Single_Quote
+//        WB7b.   Hebrew_Letter ร— Double_Quote Hebrew_Letter
+//        WB7c.   Hebrew_Letter Double_Quote ร— Hebrew_Letter
+//        WB9.    AHLetter ร— Numeric
+//        WB10.   Numeric ร— AHLetter
+//        WB13.   Katakana ร— Katakana
+//        WB13a.  (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+//        WB13b.  ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
+//
+{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                     | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                      )+
                    )
-({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                     | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                      )+
                    )
 )*
@@ -172,13 +267,13 @@ ComplexContextEx    = \p{LB:Complex_Context}
 
 // From UAX #29:
 //
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
+//    [C]haracters with the Line_Break property values of Contingent_Break (CB),
+//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word
 //    boundary property values based on criteria outside of the scope of this
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 9.0, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC ( ๏ฟผ ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -191,17 +286,14 @@ ComplexContextEx    = \p{LB:Complex_Context}
 //
 {ComplexContextEx}+ { return SOUTH_EAST_ASIAN_TYPE; }
 
-// UAX#29 WB14.  Any รท Any
+// UAX#29 WB999.  Any รท Any
 //
 {HanEx} { return IDEOGRAPHIC_TYPE; }
 {HiraganaEx} { return HIRAGANA_TYPE; }
 
-
-// UAX#29 WB3.   CR ร— LF
-//        WB3a.  (Newline | CR | LF) รท
-//        WB3b.  รท (Newline | CR | LF)
-//        WB13c. Regional_Indicator ร— Regional_Indicator
-//        WB14.  Any รท Any
+// UAX#29 WB3.    CR ร— LF
+//        WB3a.   (Newline | CR | LF) รท
+//        WB3b.   รท (Newline | CR | LF)
+//        WB999.  Any รท Any
 //
-{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-  { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */ }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
index 6abbc2b..615b565 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
@@ -18,8 +18,11 @@ package org.apache.lucene.analysis.standard;
 
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Random;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -27,6 +30,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TestUtil;
 
@@ -282,7 +286,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
   }
   
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+    WordBreakTestUnicode_9_0_0 wordBreakTest = new WordBreakTestUnicode_9_0_0();
     wordBreakTest.test(a);
   }
   
@@ -358,8 +362,80 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
     BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new String[] { "3_1", "2" });
   }
 
-
-
+  /** simple emoji */
+  public void testEmoji() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉ ๐Ÿ’ฉ๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "๐Ÿ’ฉ", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence */
+  public void testEmojiSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ",
+        new String[] { "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence with fitzpatrick modifier */
+  public void testEmojiSequenceWithModifier() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ",
+        new String[] { "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** regional indicator */
+  public void testEmojiRegionalIndicator() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡บ๐Ÿ‡ธ",
+        new String[] { "๐Ÿ‡บ๐Ÿ‡ธ", "๐Ÿ‡บ๐Ÿ‡ธ" },
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** variation sequence */
+  public void testEmojiVariationSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#๏ธโƒฃ",
+        new String[] { "#๏ธโƒฃ" },
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3๏ธโƒฃ",
+        new String[] { "3๏ธโƒฃ",},
+        new String[] { "<EMOJI>" });
+
+    // text presentation sequences
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
+        new String[] { },
+        new String[] { });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
+        new String[] { "3\uFE0E",},
+        new String[] { "<NUM>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
+        new String[] { "\u2B55",},
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
+        new String[] { "\u2B55", "\u200D\u2B55"},
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  public void testEmojiTagSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ",
+        new String[] { "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  public void testEmojiTokenization() throws Exception {
+    // simple emoji around latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "poo๐Ÿ’ฉpoo",
+        new String[] { "poo", "๐Ÿ’ฉ", "poo" },
+        new String[] { "<ALPHANUM>", "<EMOJI>", "<ALPHANUM>" });
+    // simple emoji around non-latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉไธญๅœ‹๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "ไธญ", "ๅœ‹", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<EMOJI>" });
+  }
+  
+  public void testUnicodeEmojiTests() throws Exception {
+    EmojiTokenizationTestUnicode_11_0 emojiTest = new EmojiTokenizationTestUnicode_11_0();
+    emojiTest.test(a);
+  }
+  
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     Analyzer analyzer = new StandardAnalyzer();
@@ -416,4 +492,53 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
     assertAnalyzesTo(a, "ab cd toolong xy z", new String[]{"ab", "cd", "toolo", "ng", "xy", "z"});
     a.close();
   }
+
+  public void testSplitSurrogatePairWithSpoonFeedReader() throws Exception {
+    String text = "12345678\ud800\udf00"; // U+D800 U+DF00 = U+10300 = ๐Œ€ (OLD ITALIC LETTER A)
+    
+    // Collect tokens with normal reader
+    StandardAnalyzer a = new StandardAnalyzer();
+    TokenStream ts = a.tokenStream("dummy", text);
+    List<String> tokens = new ArrayList<>();
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+    ts.reset();
+    while (ts.incrementToken()) {
+      tokens.add(termAtt.toString());
+    }
+    ts.end();
+    ts.close();
+
+    // Tokens from a spoon-feed reader should be the same as from a normal reader
+    // The 9th char is a high surrogate, so the 9-max-chars spoon-feed reader will split the surrogate pair at a read boundary
+    Reader reader = new SpoonFeedMaxCharsReaderWrapper(9, new StringReader(text));
+    ts = a.tokenStream("dummy", reader);
+    termAtt = ts.addAttribute(CharTermAttribute.class);
+    ts.reset();
+    for (int tokenNum = 0 ; ts.incrementToken() ; ++tokenNum) {
+      assertEquals("token #" + tokenNum + " mismatch: ", termAtt.toString(), tokens.get(tokenNum));
+    }
+    ts.end();
+    ts.close();
+  }
+}
+
+class SpoonFeedMaxCharsReaderWrapper extends Reader {
+  private final Reader in;
+  private final int maxChars; 
+
+  public SpoonFeedMaxCharsReaderWrapper(int maxChars, Reader in) {
+    this.in = in;
+    this.maxChars = maxChars;
+  }
+
+  @Override
+  public void close() throws IOException {
+    in.close();
+  }
+
+  /** Returns the configured number of chars if available */
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    return in.read(cbuf, off, Math.min(maxChars, len));
+  }
 }


[11/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
deleted file mode 100644
index 4a3731e..0000000
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
+++ /dev/null
@@ -1,5537 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.junit.Ignore;
-
-/**
- * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
- * from: http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
- *
- * WordBreakTest.txt indicates the points in the provided character sequences
- * at which conforming implementations must and must not break words.  This
- * class tests for expected token extraction from each of the test sequences
- * in WordBreakTest.txt, where the expected tokens are those character
- * sequences bounded by word breaks and containing at least one character
- * from one of the following character sets:
- *
- *    \p{Script = Han}                (From http://www.unicode.org/Public/6.3.0/ucd/Scripts.txt)
- *    \p{Script = Hiragana}
- *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.3.0/ucd/LineBreak.txt)
- *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt)
- *    \p{WordBreak = Hebrew_Letter}
- *    \p{WordBreak = Katakana}
- *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\uFF10-\uFF19]                (Full-width Arabic digits)
- */
-@Ignore
-public class WordBreakTestUnicode_6_3_0 extends BaseTokenStreamTestCase {
-
-  public void test(Analyzer analyzer) throws Exception {
-    // รท 0001 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0001",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0001 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
-                     new String[] {  });
-
-    // รท 0001 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\r",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000D รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
-                     new String[] {  });
-
-    // รท 0001 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\n",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
-                     new String[] {  });
-
-    // รท 0001 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u000B",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 000B รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
-                     new String[] {  });
-
-    // รท 0001 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 0001 ร— 0308 รท 3031 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 0001 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 0001 ร— 0308 รท 0041 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 0001 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u003A",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
-                     new String[] {  });
-
-    // รท 0001 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002C",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
-                     new String[] {  });
-
-    // รท 0001 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002E",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 002E รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
-                     new String[] {  });
-
-    // รท 0001 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 0001 ร— 0308 รท 0030 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 0001 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u005F",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 005F รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
-                     new String[] {  });
-
-    // รท 0001 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 1F1E6 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 0001 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 0001 ร— 0308 รท 05D0 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 0001 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\"",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0022 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
-                     new String[] {  });
-
-    // รท 0001 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0027",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
-                     new String[] {  });
-
-    // รท 0001 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u00AD",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 ร— 00AD รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 0001 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0300",
-                     new String[] {  });
-
-    // รท 0001 ร— 0308 ร— 0300 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
-                     new String[] {  });
-
-    // รท 0001 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 0001 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 0001 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 0001 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 0001 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 0001 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 0001 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <START OF HEADING> (Other) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0001",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0001 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000D รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\r",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000D รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\r",
-                     new String[] {  });
-
-    // รท 000D ร— 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) ร— [3.0] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\n",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\n",
-                     new String[] {  });
-
-    // รท 000D รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u000B",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 000B รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000D รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000D รท 0308 รท 3031 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000D รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000D รท 0308 รท 0041 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000D รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u003A",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000D รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002C",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000D รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002E",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 002E รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000D รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000D รท 0308 รท 0030 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000D รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u005F",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 005F รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000D รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 1F1E6 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000D รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000D รท 0308 รท 05D0 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000D รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\"",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0022 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\"",
-                     new String[] {  });
-
-    // รท 000D รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0027",
-                     new String[] {  });
-
-    // รท 000D รท 0308 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000D รท 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u00AD",
-                     new String[] {  });
-
-    // รท 000D รท 0308 ร— 00AD รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000D รท 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0300",
-                     new String[] {  });
-
-    // รท 000D รท 0308 ร— 0300 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000D รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000D รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000D รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000D รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000D รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <CARRIAGE RETURN (CR)> (CR) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0001",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0001 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000A รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\r",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000D รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\r",
-                     new String[] {  });
-
-    // รท 000A รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\n",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\n",
-                     new String[] {  });
-
-    // รท 000A รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u000B",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 000B รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000A รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000A รท 0308 รท 3031 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000A รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000A รท 0308 รท 0041 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000A รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u003A",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000A รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002C",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000A รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002E",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 002E รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000A รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000A รท 0308 รท 0030 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000A รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u005F",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 005F รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000A รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000A รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000A รท 0308 รท 05D0 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000A รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\"",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0022 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\"",
-                     new String[] {  });
-
-    // รท 000A รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0027",
-                     new String[] {  });
-
-    // รท 000A รท 0308 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000A รท 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u00AD",
-                     new String[] {  });
-
-    // รท 000A รท 0308 ร— 00AD รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000A รท 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0300",
-                     new String[] {  });
-
-    // รท 000A รท 0308 ร— 0300 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000A รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000A รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000A รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000A รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000A รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE FEED (LF)> (LF) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0001",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0001 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
-                     new String[] {  });
-
-    // รท 000B รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\r",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000D รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
-                     new String[] {  });
-
-    // รท 000B รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\n",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
-                     new String[] {  });
-
-    // รท 000B รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u000B",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 000B รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
-                     new String[] {  });
-
-    // รท 000B รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000B รท 0308 รท 3031 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 000B รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000B รท 0308 รท 0041 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 000B รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u003A",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
-                     new String[] {  });
-
-    // รท 000B รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002C",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
-                     new String[] {  });
-
-    // รท 000B รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002E",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 002E รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
-                     new String[] {  });
-
-    // รท 000B รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000B รท 0308 รท 0030 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 000B รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u005F",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 005F รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
-                     new String[] {  });
-
-    // รท 000B รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 1F1E6 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 000B รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000B รท 0308 รท 05D0 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 000B รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\"",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0022 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
-                     new String[] {  });
-
-    // รท 000B รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0027",
-                     new String[] {  });
-
-    // รท 000B รท 0308 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
-                     new String[] {  });
-
-    // รท 000B รท 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u00AD",
-                     new String[] {  });
-
-    // รท 000B รท 0308 ร— 00AD รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 000B รท 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0300",
-                     new String[] {  });
-
-    // รท 000B รท 0308 ร— 0300 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
-                     new String[] {  });
-
-    // รท 000B รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000B รท 0308 รท 0061 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // รท 000B รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0308 รท 0061 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // รท 000B รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 003A รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 0027 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 002C รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 000B รท 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] <LINE TABULATION> (Newline) รท [3.1] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // รท 3031 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0001",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0001 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\r",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000D รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\n",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u000B",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 000B รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u3031",
-                     new String[] { "\u3031\u3031" });
-
-    // รท 3031 ร— 0308 ร— 3031 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
-                     new String[] { "\u3031\u0308\u3031" });
-
-    // รท 3031 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0041",
-                     new String[] { "\u3031", "\u0041" });
-
-    // รท 3031 ร— 0308 รท 0041 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
-                     new String[] { "\u3031\u0308", "\u0041" });
-
-    // รท 3031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u003A",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002C",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002E",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 002E รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0030",
-                     new String[] { "\u3031", "\u0030" });
-
-    // รท 3031 ร— 0308 รท 0030 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
-                     new String[] { "\u3031\u0308", "\u0030" });
-
-    // รท 3031 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u005F",
-                     new String[] { "\u3031\u005F" });
-
-    // รท 3031 ร— 0308 ร— 005F รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
-                     new String[] { "\u3031\u0308\u005F" });
-
-    // รท 3031 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 1F1E6 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u05D0",
-                     new String[] { "\u3031", "\u05D0" });
-
-    // รท 3031 ร— 0308 รท 05D0 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
-                     new String[] { "\u3031\u0308", "\u05D0" });
-
-    // รท 3031 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\"",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0022 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0027",
-                     new String[] { "\u3031" });
-
-    // รท 3031 ร— 0308 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
-                     new String[] { "\u3031\u0308" });
-
-    // รท 3031 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u00AD",
-                     new String[] { "\u3031\u00AD" });
-
-    // รท 3031 ร— 0308 ร— 00AD รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
-                     new String[] { "\u3031\u0308\u00AD" });
-
-    // รท 3031 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0300",
-                     new String[] { "\u3031\u0300" });
-
-    // รท 3031 ร— 0308 ร— 0300 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
-                     new String[] { "\u3031\u0308\u0300" });
-
-    // รท 3031 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
-                     new String[] { "\u3031", "\u0061\u2060" });
-
-    // รท 3031 ร— 0308 รท 0061 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
-                     new String[] { "\u3031\u0308", "\u0061\u2060" });
-
-    // รท 3031 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 0027 ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
-                     new String[] { "\u3031", "\u0061" });
-
-    // รท 3031 ร— 0308 รท 0061 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // รท 3031 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 003A รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 0027 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 002C รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 3031 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
-                     new String[] { "\u3031", "\u0031" });
-
-    // รท 3031 ร— 0308 รท 0031 รท 002E ร— 2060 รท  #  รท [0.2] VERTICAL KANA REPEAT MARK (Katakana) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // รท 0041 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0001",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0001 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\r",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000D รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\n",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u000B",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 000B รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u3031",
-                     new String[] { "\u0041", "\u3031" });
-
-    // รท 0041 ร— 0308 รท 3031 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
-                     new String[] { "\u0041\u0308", "\u3031" });
-
-    // รท 0041 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0041",
-                     new String[] { "\u0041\u0041" });
-
-    // รท 0041 ร— 0308 ร— 0041 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
-                     new String[] { "\u0041\u0308\u0041" });
-
-    // รท 0041 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u003A",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002C",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002E",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 002E รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0030",
-                     new String[] { "\u0041\u0030" });
-
-    // รท 0041 ร— 0308 ร— 0030 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
-                     new String[] { "\u0041\u0308\u0030" });
-
-    // รท 0041 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u005F",
-                     new String[] { "\u0041\u005F" });
-
-    // รท 0041 ร— 0308 ร— 005F รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [13.1] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
-                     new String[] { "\u0041\u0308\u005F" });
-
-    // รท 0041 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 1F1E6 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u05D0",
-                     new String[] { "\u0041\u05D0" });
-
-    // รท 0041 ร— 0308 ร— 05D0 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
-                     new String[] { "\u0041\u0308\u05D0" });
-
-    // รท 0041 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\"",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0022 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0027",
-                     new String[] { "\u0041" });
-
-    // รท 0041 ร— 0308 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
-                     new String[] { "\u0041\u0308" });
-
-    // รท 0041 ร— 00AD รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u00AD",
-                     new String[] { "\u0041\u00AD" });
-
-    // รท 0041 ร— 0308 ร— 00AD รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u00AD",
-                     new String[] { "\u0041\u0308\u00AD" });
-
-    // รท 0041 ร— 0300 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0300",
-                     new String[] { "\u0041\u0300" });
-
-    // รท 0041 ร— 0308 ร— 0300 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0300",
-                     new String[] { "\u0041\u0308\u0300" });
-
-    // รท 0041 ร— 0061 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u2060",
-                     new String[] { "\u0041\u0061\u2060" });
-
-    // รท 0041 ร— 0308 ร— 0061 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u2060",
-                     new String[] { "\u0041\u0308\u0061\u2060" });
-
-    // รท 0041 ร— 0061 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u003A",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u003A",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 0027 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 0027 ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] APOSTROPHE (Single_Quote) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0061 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u002C",
-                     new String[] { "\u0041\u0061" });
-
-    // รท 0041 ร— 0308 ร— 0061 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [5.0] LATIN SMALL LETTER A (ALetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u002C",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // รท 0041 ร— 0031 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u003A",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 003A รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u003A",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u0027",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 0027 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u0027",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002C",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 002C รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002C",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 0041 ร— 0031 รท 002E ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0031" });
-
-    // รท 0041 ร— 0308 ร— 0031 รท 002E ร— 2060 รท  #  รท [0.2] LATIN CAPITAL LETTER A (ALetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [9.0] DIGIT ONE (Numeric) รท [999.0] FULL STOP (MidNumLet) ร— [4.0] WORD JOINER (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // รท 003A รท 0001 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0001",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0001 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] <START OF HEADING> (Other) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0001",
-                     new String[] {  });
-
-    // รท 003A รท 000D รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\r",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000D รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <CARRIAGE RETURN (CR)> (CR) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\r",
-                     new String[] {  });
-
-    // รท 003A รท 000A รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\n",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000A รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE FEED (LF)> (LF) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\n",
-                     new String[] {  });
-
-    // รท 003A รท 000B รท  #  รท [0.2] COLON (MidLetter) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u000B",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 000B รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [3.2] <LINE TABULATION> (Newline) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u000B",
-                     new String[] {  });
-
-    // รท 003A รท 3031 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 003A ร— 0308 รท 3031 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] VERTICAL KANA REPEAT MARK (Katakana) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // รท 003A รท 0041 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 003A ร— 0308 รท 0041 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LATIN CAPITAL LETTER A (ALetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // รท 003A รท 003A รท  #  รท [0.2] COLON (MidLetter) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u003A",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 003A รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COLON (MidLetter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u003A",
-                     new String[] {  });
-
-    // รท 003A รท 002C รท  #  รท [0.2] COLON (MidLetter) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002C",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 002C รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] COMMA (MidNum) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002C",
-                     new String[] {  });
-
-    // รท 003A รท 002E รท  #  รท [0.2] COLON (MidLetter) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002E",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 002E รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] FULL STOP (MidNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002E",
-                     new String[] {  });
-
-    // รท 003A รท 0030 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 003A ร— 0308 รท 0030 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] DIGIT ZERO (Numeric) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // รท 003A รท 005F รท  #  รท [0.2] COLON (MidLetter) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u005F",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 005F รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] LOW LINE (ExtendNumLet) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u005F",
-                     new String[] {  });
-
-    // รท 003A รท 1F1E6 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 1F1E6 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // รท 003A รท 05D0 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 003A ร— 0308 รท 05D0 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] HEBREW LETTER ALEF (Hebrew_Letter) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // รท 003A รท 0022 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\"",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0022 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] QUOTATION MARK (Double_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\"",
-                     new String[] {  });
-
-    // รท 003A รท 0027 รท  #  รท [0.2] COLON (MidLetter) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0027",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 รท 0027 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) รท [999.0] APOSTROPHE (Single_Quote) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0027",
-                     new String[] {  });
-
-    // รท 003A ร— 00AD รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u00AD",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 ร— 00AD รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] SOFT HYPHEN (Format_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u00AD",
-                     new String[] {  });
-
-    // รท 003A ร— 0300 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0300",
-                     new String[] {  });
-
-    // รท 003A ร— 0308 ร— 0300 รท  #  รท [0.2] COLON (MidLetter) ร— [4.0] COMBINING DIAERESIS (Extend_FE) ร— [4.0] COMBINING GRAVE ACCENT (Extend_FE) รท [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0300",
-                     new Str

<TRUNCATED>

[09/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
new file mode 100644
index 0000000..446253d
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateEmojiTokenizationTest.pl
@@ -0,0 +1,150 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+/) {
+    print STDERR "Usage: $script_name -v <version>\n";
+    print STDERR "\tversion must be of the form X.Y, e.g. 11.0\n"
+        if ($version);
+    exit 1;
+}
+my $url = "http://www.unicode.org/Public/emoji/${version}/emoji-test.txt";
+my $underscore_version = $version;
+$underscore_version =~ s/\./_/g;
+my $class_name = "EmojiTokenizationTestUnicode_${underscore_version}";
+my $output_filename = "${class_name}.java";
+my $header =<<"__HEADER__";
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by ${script_name}
+ * from: ${url}
+ *
+ * emoji-test.txt contains emoji char sequences, which are represented as
+ * tokenization tests in this class.
+ * 
+ */
+\@Ignore
+public class ${class_name} extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    for (int i = 0 ; i < tests.length ; i += 2) {
+      String test = tests[i + 1];
+      try {
+        assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
+      } catch (Throwable t) {
+        throw new Exception("Failed to tokenize \\"" + tests[i] + "\\":", t);        
+      }
+    }
+  }
+
+  private String[] tests = new String[] {
+__HEADER__
+
+my @tests = split /\r?\n/, get_URL_content($url);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+open OUT, ">$output_path"
+    || die "Error opening '$output_path' for writing: $!";
+
+print STDERR "Writing '$output_path'...";
+
+print OUT $header;
+
+my $isFirst = 1;
+for my $line (@tests) {
+    next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
+
+    print OUT ",\n\n" unless $isFirst;
+    $isFirst = 0;
+
+    # Example line: 1F46E 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚๏ธ man police officer: light skin tone
+    $line =~ s/\s+$//;     # Trim trailing whitespace
+    $line =~ s/\t/  /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
+    print OUT "    \"$line\",\n";
+    my ($test_string) = $line =~ /^(.*?)\s*;/;
+    $test_string =~ s/([0-9A-F]+)/\\u$1/g;
+    $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
+    $test_string =~ s/\s//g;
+    print OUT "    \"${test_string}\"";
+}
+print OUT "  };\n}\n";
+close OUT;
+print STDERR "done.\n";
+
+
+# sub above_BMP_char_to_surrogates
+#
+# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
+# to the corresponding UTF-16 surrogate pair
+#
+# Assumption: input string is a sequence more than four hex digits
+#
+sub above_BMP_char_to_surrogates {
+    my $ch = hex(shift);
+    my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
+    my $low_surrogate  = 0xDC00 + ($ch & 0x3FF);
+    return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
+}
+
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+sub get_URL_content {
+    my $url = shift;
+    print STDERR "Retrieving '$url'...";
+    my $user_agent = LWP::UserAgent->new;
+    my $request = HTTP::Request->new(GET => $url);
+    my $response = $user_agent->request($request);
+    unless ($response->is_success) {
+        print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+        exit 1;
+    }
+    print STDERR "done.\n";
+    return $response->content;
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
index 3004035..dd16cb6 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
@@ -40,8 +40,6 @@ $underscore_version =~ s/\./_/g;
 my $class_name = "WordBreakTestUnicode_${underscore_version}";
 my $output_filename = "${class_name}.java";
 my $header =<<"__HEADER__";
-package org.apache.lucene.analysis;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -59,6 +57,8 @@ package org.apache.lucene.analysis;
  * limitations under the License.
  */
 
+package org.apache.lucene.analysis.standard;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.junit.Ignore;
@@ -81,7 +81,7 @@ import org.junit.Ignore;
  *    \\p{WordBreak = Hebrew_Letter}
  *    \\p{WordBreak = Katakana}
  *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\\uFF10-\\uFF19]                (Full-width Arabic digits)
+ *    [\\uFF10-\\uFF19]                 (Full-width Arabic digits)
  */
 \@Ignore
 public class ${class_name} extends BaseTokenStreamTestCase {
@@ -91,6 +91,7 @@ __HEADER__
 
 my $codepoints = [];
 map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
+my $regional_indicator_codepoints = [];
 # Complex_Context is an alias for 'SA', which is used in LineBreak.txt
 # Using lowercase versions of property value names to allow for case-
 # insensitive comparison with the names in the Unicode data files.
@@ -98,7 +99,9 @@ parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
 parse_Unicode_data_file($scripts_url, $codepoints, 
                         {'han' => 1, 'hiragana' => 1});
 parse_Unicode_data_file($word_break_url, $codepoints,
-                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
+                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1, 'e_base' => 1,
+                         'e_modifier' => 1, 'glue_after_zwj' => 1, 'e_base_gaz' => 1});
+parse_Unicode_data_file($word_break_url, $regional_indicator_codepoints, {'regional_indicator' => 1});
 my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
 
 my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
@@ -124,10 +127,21 @@ for my $line (@tests) {
   $test_string =~ s/\\u000D/\\r/g;
   $test_string =~ s/\\u0022/\\\"/g;
   $sequence =~ s/^\s*รท\s*//; # Trim leading break character
+  
+  # TODO: When upgrading JFlex to a version that supports Unicode 11.0+: remove the special case below for a Unicode 9.0 test data line that conflicts with TR#51 11.0 test data
+  # รท 200D รท 261D รท  #  รท [0.2] ZERO WIDTH JOINER (ZWJ_FE) รท [999.0] WHITE UP POINTING INDEX (E_Base) รท [0.3]
+  if ($sequence =~ /^200D\s*รท\s*261D$/) {
+    print OUT "    // Skipping this test because it conflicts with TR#51 v11.0 rules.\n\n";
+    next;
+  }
+  
   my @tokens = ();
+  my $isfirst = 0;
   for my $candidate (split /\s*รท\s*/, $sequence) {
+    $isfirst = 1;
     my @chars = ();
-    my $has_wanted_char = 0;
+    my $has_wanted_chars = 0;
+    my $prev_char_regional_indicator = 0;
     while ($candidate =~ /([0-9A-F]+)/gi) {
       my $hexchar = $1;
       if (4 == length($hexchar)) {
@@ -135,12 +149,21 @@ for my $line (@tests) {
       } else {
         push @chars, above_BMP_char_to_surrogates($hexchar);
       }
-      unless ($has_wanted_char) {
-        $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
+      unless ($has_wanted_chars) {
+        my $codepoint = hex($hexchar);
+        if (defined($codepoints->[$codepoint])) {
+          $has_wanted_chars = 1;
+        } elsif (defined($regional_indicator_codepoints->[$codepoint])) {
+          if (1 == $prev_char_regional_indicator) {
+            $has_wanted_chars = 1; # must be 2 regional indicators in a row
+          } else {
+            $prev_char_regional_indicator = 1;
+          }
+        }
       }
     }
-    if ($has_wanted_char) {
-      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
+    if ($has_wanted_chars) {
+      push @tokens, '"'.join('', map { $_ eq "0022" ? "\\\"" : "\\u$_" } @chars).'"';
     }
   }
   print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";


[20/24] lucene-solr:branch_7x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8c65da6/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
new file mode 100644
index 0000000..5e99ef4
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/EmojiTokenizationTestUnicode_11_0.java
@@ -0,0 +1,10756 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by generateEmojiTokenizationTest.pl
+ * from: http://www.unicode.org/Public/emoji/11.0/emoji-test.txt
+ *
+ * emoji-test.txt contains emoji char sequences, which are represented as
+ * tokenization tests in this class.
+ * 
+ */
+@Ignore
+public class EmojiTokenizationTestUnicode_11_0 extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    for (int i = 0 ; i < tests.length ; i += 2) {
+      String test = tests[i + 1];
+      try {
+        assertAnalyzesTo(analyzer, test, new String[] { test }, new String[] { "<EMOJI>" });
+      } catch (Throwable t) {
+        throw new Exception("Failed to tokenize \"" + tests[i] + "\":", t);        
+      }
+    }
+  }
+
+  private String[] tests = new String[] {
+    "1F600                                      ; fully-qualified     # ๐Ÿ˜€ grinning face",
+    "\uD83D\uDE00",
+
+    "1F601                                      ; fully-qualified     # ๐Ÿ˜ beaming face with smiling eyes",
+    "\uD83D\uDE01",
+
+    "1F602                                      ; fully-qualified     # ๐Ÿ˜‚ face with tears of joy",
+    "\uD83D\uDE02",
+
+    "1F923                                      ; fully-qualified     # ๐Ÿคฃ rolling on the floor laughing",
+    "\uD83E\uDD23",
+
+    "1F603                                      ; fully-qualified     # ๐Ÿ˜ƒ grinning face with big eyes",
+    "\uD83D\uDE03",
+
+    "1F604                                      ; fully-qualified     # ๐Ÿ˜„ grinning face with smiling eyes",
+    "\uD83D\uDE04",
+
+    "1F605                                      ; fully-qualified     # ๐Ÿ˜… grinning face with sweat",
+    "\uD83D\uDE05",
+
+    "1F606                                      ; fully-qualified     # ๐Ÿ˜† grinning squinting face",
+    "\uD83D\uDE06",
+
+    "1F609                                      ; fully-qualified     # ๐Ÿ˜‰ winking face",
+    "\uD83D\uDE09",
+
+    "1F60A                                      ; fully-qualified     # ๐Ÿ˜Š smiling face with smiling eyes",
+    "\uD83D\uDE0A",
+
+    "1F60B                                      ; fully-qualified     # ๐Ÿ˜‹ face savoring food",
+    "\uD83D\uDE0B",
+
+    "1F60E                                      ; fully-qualified     # ๐Ÿ˜Ž smiling face with sunglasses",
+    "\uD83D\uDE0E",
+
+    "1F60D                                      ; fully-qualified     # ๐Ÿ˜ smiling face with heart-eyes",
+    "\uD83D\uDE0D",
+
+    "1F618                                      ; fully-qualified     # ๐Ÿ˜˜ face blowing a kiss",
+    "\uD83D\uDE18",
+
+    "1F970                                      ; fully-qualified     # ๐Ÿฅฐ smiling face with 3 hearts",
+    "\uD83E\uDD70",
+
+    "1F617                                      ; fully-qualified     # ๐Ÿ˜— kissing face",
+    "\uD83D\uDE17",
+
+    "1F619                                      ; fully-qualified     # ๐Ÿ˜™ kissing face with smiling eyes",
+    "\uD83D\uDE19",
+
+    "1F61A                                      ; fully-qualified     # ๐Ÿ˜š kissing face with closed eyes",
+    "\uD83D\uDE1A",
+
+    "263A FE0F                                  ; fully-qualified     # โ˜บ๏ธ smiling face",
+    "\u263A\uFE0F",
+
+    "263A                                       ; non-fully-qualified # โ˜บ smiling face",
+    "\u263A",
+
+    "1F642                                      ; fully-qualified     # ๐Ÿ™‚ slightly smiling face",
+    "\uD83D\uDE42",
+
+    "1F917                                      ; fully-qualified     # ๐Ÿค— hugging face",
+    "\uD83E\uDD17",
+
+    "1F929                                      ; fully-qualified     # ๐Ÿคฉ star-struck",
+    "\uD83E\uDD29",
+
+    "1F914                                      ; fully-qualified     # ๐Ÿค” thinking face",
+    "\uD83E\uDD14",
+
+    "1F928                                      ; fully-qualified     # ๐Ÿคจ face with raised eyebrow",
+    "\uD83E\uDD28",
+
+    "1F610                                      ; fully-qualified     # ๐Ÿ˜ neutral face",
+    "\uD83D\uDE10",
+
+    "1F611                                      ; fully-qualified     # ๐Ÿ˜‘ expressionless face",
+    "\uD83D\uDE11",
+
+    "1F636                                      ; fully-qualified     # ๐Ÿ˜ถ face without mouth",
+    "\uD83D\uDE36",
+
+    "1F644                                      ; fully-qualified     # ๐Ÿ™„ face with rolling eyes",
+    "\uD83D\uDE44",
+
+    "1F60F                                      ; fully-qualified     # ๐Ÿ˜ smirking face",
+    "\uD83D\uDE0F",
+
+    "1F623                                      ; fully-qualified     # ๐Ÿ˜ฃ persevering face",
+    "\uD83D\uDE23",
+
+    "1F625                                      ; fully-qualified     # ๐Ÿ˜ฅ sad but relieved face",
+    "\uD83D\uDE25",
+
+    "1F62E                                      ; fully-qualified     # ๐Ÿ˜ฎ face with open mouth",
+    "\uD83D\uDE2E",
+
+    "1F910                                      ; fully-qualified     # ๐Ÿค zipper-mouth face",
+    "\uD83E\uDD10",
+
+    "1F62F                                      ; fully-qualified     # ๐Ÿ˜ฏ hushed face",
+    "\uD83D\uDE2F",
+
+    "1F62A                                      ; fully-qualified     # ๐Ÿ˜ช sleepy face",
+    "\uD83D\uDE2A",
+
+    "1F62B                                      ; fully-qualified     # ๐Ÿ˜ซ tired face",
+    "\uD83D\uDE2B",
+
+    "1F634                                      ; fully-qualified     # ๐Ÿ˜ด sleeping face",
+    "\uD83D\uDE34",
+
+    "1F60C                                      ; fully-qualified     # ๐Ÿ˜Œ relieved face",
+    "\uD83D\uDE0C",
+
+    "1F61B                                      ; fully-qualified     # ๐Ÿ˜› face with tongue",
+    "\uD83D\uDE1B",
+
+    "1F61C                                      ; fully-qualified     # ๐Ÿ˜œ winking face with tongue",
+    "\uD83D\uDE1C",
+
+    "1F61D                                      ; fully-qualified     # ๐Ÿ˜ squinting face with tongue",
+    "\uD83D\uDE1D",
+
+    "1F924                                      ; fully-qualified     # ๐Ÿคค drooling face",
+    "\uD83E\uDD24",
+
+    "1F612                                      ; fully-qualified     # ๐Ÿ˜’ unamused face",
+    "\uD83D\uDE12",
+
+    "1F613                                      ; fully-qualified     # ๐Ÿ˜“ downcast face with sweat",
+    "\uD83D\uDE13",
+
+    "1F614                                      ; fully-qualified     # ๐Ÿ˜” pensive face",
+    "\uD83D\uDE14",
+
+    "1F615                                      ; fully-qualified     # ๐Ÿ˜• confused face",
+    "\uD83D\uDE15",
+
+    "1F643                                      ; fully-qualified     # ๐Ÿ™ƒ upside-down face",
+    "\uD83D\uDE43",
+
+    "1F911                                      ; fully-qualified     # ๐Ÿค‘ money-mouth face",
+    "\uD83E\uDD11",
+
+    "1F632                                      ; fully-qualified     # ๐Ÿ˜ฒ astonished face",
+    "\uD83D\uDE32",
+
+    "2639 FE0F                                  ; fully-qualified     # โ˜น๏ธ frowning face",
+    "\u2639\uFE0F",
+
+    "2639                                       ; non-fully-qualified # โ˜น frowning face",
+    "\u2639",
+
+    "1F641                                      ; fully-qualified     # ๐Ÿ™ slightly frowning face",
+    "\uD83D\uDE41",
+
+    "1F616                                      ; fully-qualified     # ๐Ÿ˜– confounded face",
+    "\uD83D\uDE16",
+
+    "1F61E                                      ; fully-qualified     # ๐Ÿ˜ž disappointed face",
+    "\uD83D\uDE1E",
+
+    "1F61F                                      ; fully-qualified     # ๐Ÿ˜Ÿ worried face",
+    "\uD83D\uDE1F",
+
+    "1F624                                      ; fully-qualified     # ๐Ÿ˜ค face with steam from nose",
+    "\uD83D\uDE24",
+
+    "1F622                                      ; fully-qualified     # ๐Ÿ˜ข crying face",
+    "\uD83D\uDE22",
+
+    "1F62D                                      ; fully-qualified     # ๐Ÿ˜ญ loudly crying face",
+    "\uD83D\uDE2D",
+
+    "1F626                                      ; fully-qualified     # ๐Ÿ˜ฆ frowning face with open mouth",
+    "\uD83D\uDE26",
+
+    "1F627                                      ; fully-qualified     # ๐Ÿ˜ง anguished face",
+    "\uD83D\uDE27",
+
+    "1F628                                      ; fully-qualified     # ๐Ÿ˜จ fearful face",
+    "\uD83D\uDE28",
+
+    "1F629                                      ; fully-qualified     # ๐Ÿ˜ฉ weary face",
+    "\uD83D\uDE29",
+
+    "1F92F                                      ; fully-qualified     # ๐Ÿคฏ exploding head",
+    "\uD83E\uDD2F",
+
+    "1F62C                                      ; fully-qualified     # ๐Ÿ˜ฌ grimacing face",
+    "\uD83D\uDE2C",
+
+    "1F630                                      ; fully-qualified     # ๐Ÿ˜ฐ anxious face with sweat",
+    "\uD83D\uDE30",
+
+    "1F631                                      ; fully-qualified     # ๐Ÿ˜ฑ face screaming in fear",
+    "\uD83D\uDE31",
+
+    "1F975                                      ; fully-qualified     # ๐Ÿฅต hot face",
+    "\uD83E\uDD75",
+
+    "1F976                                      ; fully-qualified     # ๐Ÿฅถ cold face",
+    "\uD83E\uDD76",
+
+    "1F633                                      ; fully-qualified     # ๐Ÿ˜ณ flushed face",
+    "\uD83D\uDE33",
+
+    "1F92A                                      ; fully-qualified     # ๐Ÿคช zany face",
+    "\uD83E\uDD2A",
+
+    "1F635                                      ; fully-qualified     # ๐Ÿ˜ต dizzy face",
+    "\uD83D\uDE35",
+
+    "1F621                                      ; fully-qualified     # ๐Ÿ˜ก pouting face",
+    "\uD83D\uDE21",
+
+    "1F620                                      ; fully-qualified     # ๐Ÿ˜  angry face",
+    "\uD83D\uDE20",
+
+    "1F92C                                      ; fully-qualified     # ๐Ÿคฌ face with symbols on mouth",
+    "\uD83E\uDD2C",
+
+    "1F637                                      ; fully-qualified     # ๐Ÿ˜ท face with medical mask",
+    "\uD83D\uDE37",
+
+    "1F912                                      ; fully-qualified     # ๐Ÿค’ face with thermometer",
+    "\uD83E\uDD12",
+
+    "1F915                                      ; fully-qualified     # ๐Ÿค• face with head-bandage",
+    "\uD83E\uDD15",
+
+    "1F922                                      ; fully-qualified     # ๐Ÿคข nauseated face",
+    "\uD83E\uDD22",
+
+    "1F92E                                      ; fully-qualified     # ๐Ÿคฎ face vomiting",
+    "\uD83E\uDD2E",
+
+    "1F927                                      ; fully-qualified     # ๐Ÿคง sneezing face",
+    "\uD83E\uDD27",
+
+    "1F607                                      ; fully-qualified     # ๐Ÿ˜‡ smiling face with halo",
+    "\uD83D\uDE07",
+
+    "1F920                                      ; fully-qualified     # ๐Ÿค  cowboy hat face",
+    "\uD83E\uDD20",
+
+    "1F973                                      ; fully-qualified     # ๐Ÿฅณ partying face",
+    "\uD83E\uDD73",
+
+    "1F974                                      ; fully-qualified     # ๐Ÿฅด woozy face",
+    "\uD83E\uDD74",
+
+    "1F97A                                      ; fully-qualified     # ๐Ÿฅบ pleading face",
+    "\uD83E\uDD7A",
+
+    "1F925                                      ; fully-qualified     # ๐Ÿคฅ lying face",
+    "\uD83E\uDD25",
+
+    "1F92B                                      ; fully-qualified     # ๐Ÿคซ shushing face",
+    "\uD83E\uDD2B",
+
+    "1F92D                                      ; fully-qualified     # ๐Ÿคญ face with hand over mouth",
+    "\uD83E\uDD2D",
+
+    "1F9D0                                      ; fully-qualified     # ๐Ÿง face with monocle",
+    "\uD83E\uDDD0",
+
+    "1F913                                      ; fully-qualified     # ๐Ÿค“ nerd face",
+    "\uD83E\uDD13",
+
+    "1F608                                      ; fully-qualified     # ๐Ÿ˜ˆ smiling face with horns",
+    "\uD83D\uDE08",
+
+    "1F47F                                      ; fully-qualified     # ๐Ÿ‘ฟ angry face with horns",
+    "\uD83D\uDC7F",
+
+    "1F921                                      ; fully-qualified     # ๐Ÿคก clown face",
+    "\uD83E\uDD21",
+
+    "1F479                                      ; fully-qualified     # ๐Ÿ‘น ogre",
+    "\uD83D\uDC79",
+
+    "1F47A                                      ; fully-qualified     # ๐Ÿ‘บ goblin",
+    "\uD83D\uDC7A",
+
+    "1F480                                      ; fully-qualified     # ๐Ÿ’€ skull",
+    "\uD83D\uDC80",
+
+    "2620 FE0F                                  ; fully-qualified     # โ˜ ๏ธ skull and crossbones",
+    "\u2620\uFE0F",
+
+    "2620                                       ; non-fully-qualified # โ˜  skull and crossbones",
+    "\u2620",
+
+    "1F47B                                      ; fully-qualified     # ๐Ÿ‘ป ghost",
+    "\uD83D\uDC7B",
+
+    "1F47D                                      ; fully-qualified     # ๐Ÿ‘ฝ alien",
+    "\uD83D\uDC7D",
+
+    "1F47E                                      ; fully-qualified     # ๐Ÿ‘พ alien monster",
+    "\uD83D\uDC7E",
+
+    "1F916                                      ; fully-qualified     # ๐Ÿค– robot face",
+    "\uD83E\uDD16",
+
+    "1F4A9                                      ; fully-qualified     # ๐Ÿ’ฉ pile of poo",
+    "\uD83D\uDCA9",
+
+    "1F63A                                      ; fully-qualified     # ๐Ÿ˜บ grinning cat face",
+    "\uD83D\uDE3A",
+
+    "1F638                                      ; fully-qualified     # ๐Ÿ˜ธ grinning cat face with smiling eyes",
+    "\uD83D\uDE38",
+
+    "1F639                                      ; fully-qualified     # ๐Ÿ˜น cat face with tears of joy",
+    "\uD83D\uDE39",
+
+    "1F63B                                      ; fully-qualified     # ๐Ÿ˜ป smiling cat face with heart-eyes",
+    "\uD83D\uDE3B",
+
+    "1F63C                                      ; fully-qualified     # ๐Ÿ˜ผ cat face with wry smile",
+    "\uD83D\uDE3C",
+
+    "1F63D                                      ; fully-qualified     # ๐Ÿ˜ฝ kissing cat face",
+    "\uD83D\uDE3D",
+
+    "1F640                                      ; fully-qualified     # ๐Ÿ™€ weary cat face",
+    "\uD83D\uDE40",
+
+    "1F63F                                      ; fully-qualified     # ๐Ÿ˜ฟ crying cat face",
+    "\uD83D\uDE3F",
+
+    "1F63E                                      ; fully-qualified     # ๐Ÿ˜พ pouting cat face",
+    "\uD83D\uDE3E",
+
+    "1F648                                      ; fully-qualified     # ๐Ÿ™ˆ see-no-evil monkey",
+    "\uD83D\uDE48",
+
+    "1F649                                      ; fully-qualified     # ๐Ÿ™‰ hear-no-evil monkey",
+    "\uD83D\uDE49",
+
+    "1F64A                                      ; fully-qualified     # ๐Ÿ™Š speak-no-evil monkey",
+    "\uD83D\uDE4A",
+
+    "1F3FB                                      ; fully-qualified     # ๐Ÿป light skin tone",
+    "\uD83C\uDFFB",
+
+    "1F3FC                                      ; fully-qualified     # ๐Ÿผ medium-light skin tone",
+    "\uD83C\uDFFC",
+
+    "1F3FD                                      ; fully-qualified     # ๐Ÿฝ medium skin tone",
+    "\uD83C\uDFFD",
+
+    "1F3FE                                      ; fully-qualified     # ๐Ÿพ medium-dark skin tone",
+    "\uD83C\uDFFE",
+
+    "1F3FF                                      ; fully-qualified     # ๐Ÿฟ dark skin tone",
+    "\uD83C\uDFFF",
+
+    "1F476                                      ; fully-qualified     # ๐Ÿ‘ถ baby",
+    "\uD83D\uDC76",
+
+    "1F476 1F3FB                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿป baby: light skin tone",
+    "\uD83D\uDC76\uD83C\uDFFB",
+
+    "1F476 1F3FC                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿผ baby: medium-light skin tone",
+    "\uD83D\uDC76\uD83C\uDFFC",
+
+    "1F476 1F3FD                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿฝ baby: medium skin tone",
+    "\uD83D\uDC76\uD83C\uDFFD",
+
+    "1F476 1F3FE                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿพ baby: medium-dark skin tone",
+    "\uD83D\uDC76\uD83C\uDFFE",
+
+    "1F476 1F3FF                                ; fully-qualified     # ๐Ÿ‘ถ๐Ÿฟ baby: dark skin tone",
+    "\uD83D\uDC76\uD83C\uDFFF",
+
+    "1F9D2                                      ; fully-qualified     # ๐Ÿง’ child",
+    "\uD83E\uDDD2",
+
+    "1F9D2 1F3FB                                ; fully-qualified     # ๐Ÿง’๐Ÿป child: light skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFB",
+
+    "1F9D2 1F3FC                                ; fully-qualified     # ๐Ÿง’๐Ÿผ child: medium-light skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFC",
+
+    "1F9D2 1F3FD                                ; fully-qualified     # ๐Ÿง’๐Ÿฝ child: medium skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFD",
+
+    "1F9D2 1F3FE                                ; fully-qualified     # ๐Ÿง’๐Ÿพ child: medium-dark skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFE",
+
+    "1F9D2 1F3FF                                ; fully-qualified     # ๐Ÿง’๐Ÿฟ child: dark skin tone",
+    "\uD83E\uDDD2\uD83C\uDFFF",
+
+    "1F466                                      ; fully-qualified     # ๐Ÿ‘ฆ boy",
+    "\uD83D\uDC66",
+
+    "1F466 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿป boy: light skin tone",
+    "\uD83D\uDC66\uD83C\uDFFB",
+
+    "1F466 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿผ boy: medium-light skin tone",
+    "\uD83D\uDC66\uD83C\uDFFC",
+
+    "1F466 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿฝ boy: medium skin tone",
+    "\uD83D\uDC66\uD83C\uDFFD",
+
+    "1F466 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿพ boy: medium-dark skin tone",
+    "\uD83D\uDC66\uD83C\uDFFE",
+
+    "1F466 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฆ๐Ÿฟ boy: dark skin tone",
+    "\uD83D\uDC66\uD83C\uDFFF",
+
+    "1F467                                      ; fully-qualified     # ๐Ÿ‘ง girl",
+    "\uD83D\uDC67",
+
+    "1F467 1F3FB                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿป girl: light skin tone",
+    "\uD83D\uDC67\uD83C\uDFFB",
+
+    "1F467 1F3FC                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿผ girl: medium-light skin tone",
+    "\uD83D\uDC67\uD83C\uDFFC",
+
+    "1F467 1F3FD                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿฝ girl: medium skin tone",
+    "\uD83D\uDC67\uD83C\uDFFD",
+
+    "1F467 1F3FE                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿพ girl: medium-dark skin tone",
+    "\uD83D\uDC67\uD83C\uDFFE",
+
+    "1F467 1F3FF                                ; fully-qualified     # ๐Ÿ‘ง๐Ÿฟ girl: dark skin tone",
+    "\uD83D\uDC67\uD83C\uDFFF",
+
+    "1F9D1                                      ; fully-qualified     # ๐Ÿง‘ adult",
+    "\uD83E\uDDD1",
+
+    "1F9D1 1F3FB                                ; fully-qualified     # ๐Ÿง‘๐Ÿป adult: light skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFB",
+
+    "1F9D1 1F3FC                                ; fully-qualified     # ๐Ÿง‘๐Ÿผ adult: medium-light skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFC",
+
+    "1F9D1 1F3FD                                ; fully-qualified     # ๐Ÿง‘๐Ÿฝ adult: medium skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFD",
+
+    "1F9D1 1F3FE                                ; fully-qualified     # ๐Ÿง‘๐Ÿพ adult: medium-dark skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFE",
+
+    "1F9D1 1F3FF                                ; fully-qualified     # ๐Ÿง‘๐Ÿฟ adult: dark skin tone",
+    "\uD83E\uDDD1\uD83C\uDFFF",
+
+    "1F468                                      ; fully-qualified     # ๐Ÿ‘จ man",
+    "\uD83D\uDC68",
+
+    "1F468 1F3FB                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿป man: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB",
+
+    "1F468 1F3FC                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿผ man: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC",
+
+    "1F468 1F3FD                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝ man: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD",
+
+    "1F468 1F3FE                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿพ man: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE",
+
+    "1F468 1F3FF                                ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟ man: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF",
+
+    "1F469                                      ; fully-qualified     # ๐Ÿ‘ฉ woman",
+    "\uD83D\uDC69",
+
+    "1F469 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿป woman: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB",
+
+    "1F469 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผ woman: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC",
+
+    "1F469 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝ woman: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD",
+
+    "1F469 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพ woman: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE",
+
+    "1F469 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟ woman: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF",
+
+    "1F9D3                                      ; fully-qualified     # ๐Ÿง“ older adult",
+    "\uD83E\uDDD3",
+
+    "1F9D3 1F3FB                                ; fully-qualified     # ๐Ÿง“๐Ÿป older adult: light skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFB",
+
+    "1F9D3 1F3FC                                ; fully-qualified     # ๐Ÿง“๐Ÿผ older adult: medium-light skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFC",
+
+    "1F9D3 1F3FD                                ; fully-qualified     # ๐Ÿง“๐Ÿฝ older adult: medium skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFD",
+
+    "1F9D3 1F3FE                                ; fully-qualified     # ๐Ÿง“๐Ÿพ older adult: medium-dark skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFE",
+
+    "1F9D3 1F3FF                                ; fully-qualified     # ๐Ÿง“๐Ÿฟ older adult: dark skin tone",
+    "\uD83E\uDDD3\uD83C\uDFFF",
+
+    "1F474                                      ; fully-qualified     # ๐Ÿ‘ด old man",
+    "\uD83D\uDC74",
+
+    "1F474 1F3FB                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿป old man: light skin tone",
+    "\uD83D\uDC74\uD83C\uDFFB",
+
+    "1F474 1F3FC                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿผ old man: medium-light skin tone",
+    "\uD83D\uDC74\uD83C\uDFFC",
+
+    "1F474 1F3FD                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿฝ old man: medium skin tone",
+    "\uD83D\uDC74\uD83C\uDFFD",
+
+    "1F474 1F3FE                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿพ old man: medium-dark skin tone",
+    "\uD83D\uDC74\uD83C\uDFFE",
+
+    "1F474 1F3FF                                ; fully-qualified     # ๐Ÿ‘ด๐Ÿฟ old man: dark skin tone",
+    "\uD83D\uDC74\uD83C\uDFFF",
+
+    "1F475                                      ; fully-qualified     # ๐Ÿ‘ต old woman",
+    "\uD83D\uDC75",
+
+    "1F475 1F3FB                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿป old woman: light skin tone",
+    "\uD83D\uDC75\uD83C\uDFFB",
+
+    "1F475 1F3FC                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿผ old woman: medium-light skin tone",
+    "\uD83D\uDC75\uD83C\uDFFC",
+
+    "1F475 1F3FD                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿฝ old woman: medium skin tone",
+    "\uD83D\uDC75\uD83C\uDFFD",
+
+    "1F475 1F3FE                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿพ old woman: medium-dark skin tone",
+    "\uD83D\uDC75\uD83C\uDFFE",
+
+    "1F475 1F3FF                                ; fully-qualified     # ๐Ÿ‘ต๐Ÿฟ old woman: dark skin tone",
+    "\uD83D\uDC75\uD83C\uDFFF",
+
+    "1F468 200D 2695 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โš•๏ธ man health worker",
+    "\uD83D\uDC68\u200D\u2695\uFE0F",
+
+    "1F468 200D 2695                            ; non-fully-qualified # ๐Ÿ‘จโ€โš• man health worker",
+    "\uD83D\uDC68\u200D\u2695",
+
+    "1F468 1F3FB 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โš•๏ธ man health worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2695\uFE0F",
+
+    "1F468 1F3FB 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โš• man health worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2695",
+
+    "1F468 1F3FC 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โš•๏ธ man health worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2695\uFE0F",
+
+    "1F468 1F3FC 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โš• man health worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2695",
+
+    "1F468 1F3FD 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โš•๏ธ man health worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2695\uFE0F",
+
+    "1F468 1F3FD 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โš• man health worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2695",
+
+    "1F468 1F3FE 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โš•๏ธ man health worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2695\uFE0F",
+
+    "1F468 1F3FE 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โš• man health worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2695",
+
+    "1F468 1F3FF 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โš•๏ธ man health worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2695\uFE0F",
+
+    "1F468 1F3FF 200D 2695                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โš• man health worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2695",
+
+    "1F469 200D 2695 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โš•๏ธ woman health worker",
+    "\uD83D\uDC69\u200D\u2695\uFE0F",
+
+    "1F469 200D 2695                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โš• woman health worker",
+    "\uD83D\uDC69\u200D\u2695",
+
+    "1F469 1F3FB 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โš•๏ธ woman health worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2695\uFE0F",
+
+    "1F469 1F3FB 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โš• woman health worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2695",
+
+    "1F469 1F3FC 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โš•๏ธ woman health worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2695\uFE0F",
+
+    "1F469 1F3FC 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โš• woman health worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2695",
+
+    "1F469 1F3FD 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โš•๏ธ woman health worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2695\uFE0F",
+
+    "1F469 1F3FD 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โš• woman health worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2695",
+
+    "1F469 1F3FE 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โš•๏ธ woman health worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2695\uFE0F",
+
+    "1F469 1F3FE 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โš• woman health worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2695",
+
+    "1F469 1F3FF 200D 2695 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โš•๏ธ woman health worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2695\uFE0F",
+
+    "1F469 1F3FF 200D 2695                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โš• woman health worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2695",
+
+    "1F468 200D 1F393                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽ“ man student",
+    "\uD83D\uDC68\u200D\uD83C\uDF93",
+
+    "1F468 1F3FB 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽ“ man student: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF93",
+
+    "1F468 1F3FC 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽ“ man student: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF93",
+
+    "1F468 1F3FD 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽ“ man student: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF93",
+
+    "1F468 1F3FE 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽ“ man student: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF93",
+
+    "1F468 1F3FF 200D 1F393                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽ“ man student: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF93",
+
+    "1F469 200D 1F393                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽ“ woman student",
+    "\uD83D\uDC69\u200D\uD83C\uDF93",
+
+    "1F469 1F3FB 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽ“ woman student: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF93",
+
+    "1F469 1F3FC 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽ“ woman student: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF93",
+
+    "1F469 1F3FD 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽ“ woman student: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF93",
+
+    "1F469 1F3FE 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽ“ woman student: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF93",
+
+    "1F469 1F3FF 200D 1F393                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽ“ woman student: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF93",
+
+    "1F468 200D 1F3EB                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿซ man teacher",
+    "\uD83D\uDC68\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FB 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿซ man teacher: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FC 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿซ man teacher: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FD 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿซ man teacher: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FE 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿซ man teacher: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFEB",
+
+    "1F468 1F3FF 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿซ man teacher: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFEB",
+
+    "1F469 200D 1F3EB                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿซ woman teacher",
+    "\uD83D\uDC69\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FB 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿซ woman teacher: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FC 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿซ woman teacher: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FD 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿซ woman teacher: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FE 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿซ woman teacher: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFEB",
+
+    "1F469 1F3FF 200D 1F3EB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿซ woman teacher: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFEB",
+
+    "1F468 200D 2696 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โš–๏ธ man judge",
+    "\uD83D\uDC68\u200D\u2696\uFE0F",
+
+    "1F468 200D 2696                            ; non-fully-qualified # ๐Ÿ‘จโ€โš– man judge",
+    "\uD83D\uDC68\u200D\u2696",
+
+    "1F468 1F3FB 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โš–๏ธ man judge: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2696\uFE0F",
+
+    "1F468 1F3FB 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โš– man judge: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2696",
+
+    "1F468 1F3FC 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โš–๏ธ man judge: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2696\uFE0F",
+
+    "1F468 1F3FC 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โš– man judge: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2696",
+
+    "1F468 1F3FD 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โš–๏ธ man judge: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2696\uFE0F",
+
+    "1F468 1F3FD 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โš– man judge: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2696",
+
+    "1F468 1F3FE 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โš–๏ธ man judge: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2696\uFE0F",
+
+    "1F468 1F3FE 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โš– man judge: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2696",
+
+    "1F468 1F3FF 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โš–๏ธ man judge: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2696\uFE0F",
+
+    "1F468 1F3FF 200D 2696                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โš– man judge: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2696",
+
+    "1F469 200D 2696 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โš–๏ธ woman judge",
+    "\uD83D\uDC69\u200D\u2696\uFE0F",
+
+    "1F469 200D 2696                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โš– woman judge",
+    "\uD83D\uDC69\u200D\u2696",
+
+    "1F469 1F3FB 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โš–๏ธ woman judge: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2696\uFE0F",
+
+    "1F469 1F3FB 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โš– woman judge: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2696",
+
+    "1F469 1F3FC 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โš–๏ธ woman judge: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2696\uFE0F",
+
+    "1F469 1F3FC 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โš– woman judge: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2696",
+
+    "1F469 1F3FD 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โš–๏ธ woman judge: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2696\uFE0F",
+
+    "1F469 1F3FD 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โš– woman judge: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2696",
+
+    "1F469 1F3FE 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โš–๏ธ woman judge: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2696\uFE0F",
+
+    "1F469 1F3FE 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โš– woman judge: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2696",
+
+    "1F469 1F3FF 200D 2696 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โš–๏ธ woman judge: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2696\uFE0F",
+
+    "1F469 1F3FF 200D 2696                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โš– woman judge: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2696",
+
+    "1F468 200D 1F33E                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŒพ man farmer",
+    "\uD83D\uDC68\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FB 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŒพ man farmer: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FC 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŒพ man farmer: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FD 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŒพ man farmer: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FE 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŒพ man farmer: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF3E",
+
+    "1F468 1F3FF 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŒพ man farmer: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF3E",
+
+    "1F469 200D 1F33E                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŒพ woman farmer",
+    "\uD83D\uDC69\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FB 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŒพ woman farmer: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FC 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŒพ woman farmer: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FD 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŒพ woman farmer: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FE 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŒพ woman farmer: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF3E",
+
+    "1F469 1F3FF 200D 1F33E                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŒพ woman farmer: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF3E",
+
+    "1F468 200D 1F373                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿณ man cook",
+    "\uD83D\uDC68\u200D\uD83C\uDF73",
+
+    "1F468 1F3FB 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿณ man cook: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDF73",
+
+    "1F468 1F3FC 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿณ man cook: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDF73",
+
+    "1F468 1F3FD 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿณ man cook: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDF73",
+
+    "1F468 1F3FE 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿณ man cook: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDF73",
+
+    "1F468 1F3FF 200D 1F373                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿณ man cook: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDF73",
+
+    "1F469 200D 1F373                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿณ woman cook",
+    "\uD83D\uDC69\u200D\uD83C\uDF73",
+
+    "1F469 1F3FB 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿณ woman cook: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDF73",
+
+    "1F469 1F3FC 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿณ woman cook: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDF73",
+
+    "1F469 1F3FD 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿณ woman cook: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDF73",
+
+    "1F469 1F3FE 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿณ woman cook: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDF73",
+
+    "1F469 1F3FF 200D 1F373                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿณ woman cook: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDF73",
+
+    "1F468 200D 1F527                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ”ง man mechanic",
+    "\uD83D\uDC68\u200D\uD83D\uDD27",
+
+    "1F468 1F3FB 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ”ง man mechanic: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDD27",
+
+    "1F468 1F3FC 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ”ง man mechanic: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDD27",
+
+    "1F468 1F3FD 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ”ง man mechanic: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDD27",
+
+    "1F468 1F3FE 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ”ง man mechanic: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDD27",
+
+    "1F468 1F3FF 200D 1F527                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ”ง man mechanic: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDD27",
+
+    "1F469 200D 1F527                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ”ง woman mechanic",
+    "\uD83D\uDC69\u200D\uD83D\uDD27",
+
+    "1F469 1F3FB 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ”ง woman mechanic: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDD27",
+
+    "1F469 1F3FC 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ”ง woman mechanic: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDD27",
+
+    "1F469 1F3FD 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ”ง woman mechanic: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDD27",
+
+    "1F469 1F3FE 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ”ง woman mechanic: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDD27",
+
+    "1F469 1F3FF 200D 1F527                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ”ง woman mechanic: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDD27",
+
+    "1F468 200D 1F3ED                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿญ man factory worker",
+    "\uD83D\uDC68\u200D\uD83C\uDFED",
+
+    "1F468 1F3FB 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿญ man factory worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFED",
+
+    "1F468 1F3FC 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿญ man factory worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFED",
+
+    "1F468 1F3FD 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿญ man factory worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFED",
+
+    "1F468 1F3FE 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿญ man factory worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFED",
+
+    "1F468 1F3FF 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿญ man factory worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFED",
+
+    "1F469 200D 1F3ED                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿญ woman factory worker",
+    "\uD83D\uDC69\u200D\uD83C\uDFED",
+
+    "1F469 1F3FB 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿญ woman factory worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFED",
+
+    "1F469 1F3FC 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿญ woman factory worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFED",
+
+    "1F469 1F3FD 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿญ woman factory worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFED",
+
+    "1F469 1F3FE 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿญ woman factory worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFED",
+
+    "1F469 1F3FF 200D 1F3ED                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿญ woman factory worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFED",
+
+    "1F468 200D 1F4BC                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ’ผ man office worker",
+    "\uD83D\uDC68\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FB 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ’ผ man office worker: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FC 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ’ผ man office worker: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FD 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ’ผ man office worker: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FE 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ผ man office worker: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDCBC",
+
+    "1F468 1F3FF 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ’ผ man office worker: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDCBC",
+
+    "1F469 200D 1F4BC                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ’ผ woman office worker",
+    "\uD83D\uDC69\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FB 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ’ผ woman office worker: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FC 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ’ผ woman office worker: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FD 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ผ woman office worker: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FE 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ’ผ woman office worker: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDCBC",
+
+    "1F469 1F3FF 200D 1F4BC                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ’ผ woman office worker: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDCBC",
+
+    "1F468 200D 1F52C                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ”ฌ man scientist",
+    "\uD83D\uDC68\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FB 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ”ฌ man scientist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FC 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ”ฌ man scientist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FD 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ”ฌ man scientist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FE 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ”ฌ man scientist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDD2C",
+
+    "1F468 1F3FF 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ”ฌ man scientist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDD2C",
+
+    "1F469 200D 1F52C                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ”ฌ woman scientist",
+    "\uD83D\uDC69\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FB 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ”ฌ woman scientist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FC 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ”ฌ woman scientist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FD 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ”ฌ woman scientist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FE 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ”ฌ woman scientist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDD2C",
+
+    "1F469 1F3FF 200D 1F52C                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ”ฌ woman scientist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDD2C",
+
+    "1F468 200D 1F4BB                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿ’ป man technologist",
+    "\uD83D\uDC68\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FB 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿ’ป man technologist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FC 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿ’ป man technologist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FD 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿ’ป man technologist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FE 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ป man technologist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDCBB",
+
+    "1F468 1F3FF 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿ’ป man technologist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDCBB",
+
+    "1F469 200D 1F4BB                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿ’ป woman technologist",
+    "\uD83D\uDC69\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FB 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿ’ป woman technologist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FC 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿ’ป woman technologist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FD 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿ’ป woman technologist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FE 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿ’ป woman technologist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDCBB",
+
+    "1F469 1F3FF 200D 1F4BB                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿ’ป woman technologist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDCBB",
+
+    "1F468 200D 1F3A4                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽค man singer",
+    "\uD83D\uDC68\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FB 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽค man singer: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FC 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽค man singer: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FD 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽค man singer: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FE 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽค man singer: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFA4",
+
+    "1F468 1F3FF 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽค man singer: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFA4",
+
+    "1F469 200D 1F3A4                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽค woman singer",
+    "\uD83D\uDC69\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FB 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽค woman singer: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FC 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽค woman singer: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FD 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽค woman singer: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FE 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽค woman singer: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFA4",
+
+    "1F469 1F3FF 200D 1F3A4                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽค woman singer: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFA4",
+
+    "1F468 200D 1F3A8                           ; fully-qualified     # ๐Ÿ‘จโ€๐ŸŽจ man artist",
+    "\uD83D\uDC68\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FB 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐ŸŽจ man artist: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FC 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐ŸŽจ man artist: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FD 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐ŸŽจ man artist: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FE 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐ŸŽจ man artist: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83C\uDFA8",
+
+    "1F468 1F3FF 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐ŸŽจ man artist: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83C\uDFA8",
+
+    "1F469 200D 1F3A8                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐ŸŽจ woman artist",
+    "\uD83D\uDC69\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FB 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐ŸŽจ woman artist: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FC 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐ŸŽจ woman artist: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FD 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐ŸŽจ woman artist: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FE 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐ŸŽจ woman artist: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83C\uDFA8",
+
+    "1F469 1F3FF 200D 1F3A8                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐ŸŽจ woman artist: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83C\uDFA8",
+
+    "1F468 200D 2708 FE0F                       ; fully-qualified     # ๐Ÿ‘จโ€โœˆ๏ธ man pilot",
+    "\uD83D\uDC68\u200D\u2708\uFE0F",
+
+    "1F468 200D 2708                            ; non-fully-qualified # ๐Ÿ‘จโ€โœˆ man pilot",
+    "\uD83D\uDC68\u200D\u2708",
+
+    "1F468 1F3FB 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€โœˆ๏ธ man pilot: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2708\uFE0F",
+
+    "1F468 1F3FB 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿปโ€โœˆ man pilot: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\u2708",
+
+    "1F468 1F3FC 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€โœˆ๏ธ man pilot: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2708\uFE0F",
+
+    "1F468 1F3FC 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿผโ€โœˆ man pilot: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\u2708",
+
+    "1F468 1F3FD 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€โœˆ๏ธ man pilot: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2708\uFE0F",
+
+    "1F468 1F3FD 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฝโ€โœˆ man pilot: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\u2708",
+
+    "1F468 1F3FE 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€โœˆ๏ธ man pilot: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2708\uFE0F",
+
+    "1F468 1F3FE 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿพโ€โœˆ man pilot: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\u2708",
+
+    "1F468 1F3FF 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€โœˆ๏ธ man pilot: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2708\uFE0F",
+
+    "1F468 1F3FF 200D 2708                      ; non-fully-qualified # ๐Ÿ‘จ๐Ÿฟโ€โœˆ man pilot: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\u2708",
+
+    "1F469 200D 2708 FE0F                       ; fully-qualified     # ๐Ÿ‘ฉโ€โœˆ๏ธ woman pilot",
+    "\uD83D\uDC69\u200D\u2708\uFE0F",
+
+    "1F469 200D 2708                            ; non-fully-qualified # ๐Ÿ‘ฉโ€โœˆ woman pilot",
+    "\uD83D\uDC69\u200D\u2708",
+
+    "1F469 1F3FB 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€โœˆ๏ธ woman pilot: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2708\uFE0F",
+
+    "1F469 1F3FB 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿปโ€โœˆ woman pilot: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\u2708",
+
+    "1F469 1F3FC 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€โœˆ๏ธ woman pilot: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2708\uFE0F",
+
+    "1F469 1F3FC 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿผโ€โœˆ woman pilot: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\u2708",
+
+    "1F469 1F3FD 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€โœˆ๏ธ woman pilot: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2708\uFE0F",
+
+    "1F469 1F3FD 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฝโ€โœˆ woman pilot: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\u2708",
+
+    "1F469 1F3FE 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€โœˆ๏ธ woman pilot: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2708\uFE0F",
+
+    "1F469 1F3FE 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿพโ€โœˆ woman pilot: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\u2708",
+
+    "1F469 1F3FF 200D 2708 FE0F                 ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€โœˆ๏ธ woman pilot: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2708\uFE0F",
+
+    "1F469 1F3FF 200D 2708                      ; non-fully-qualified # ๐Ÿ‘ฉ๐Ÿฟโ€โœˆ woman pilot: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\u2708",
+
+    "1F468 200D 1F680                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿš€ man astronaut",
+    "\uD83D\uDC68\u200D\uD83D\uDE80",
+
+    "1F468 1F3FB 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿš€ man astronaut: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDE80",
+
+    "1F468 1F3FC 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿš€ man astronaut: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDE80",
+
+    "1F468 1F3FD 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿš€ man astronaut: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDE80",
+
+    "1F468 1F3FE 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿš€ man astronaut: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDE80",
+
+    "1F468 1F3FF 200D 1F680                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿš€ man astronaut: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDE80",
+
+    "1F469 200D 1F680                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿš€ woman astronaut",
+    "\uD83D\uDC69\u200D\uD83D\uDE80",
+
+    "1F469 1F3FB 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿš€ woman astronaut: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDE80",
+
+    "1F469 1F3FC 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿš€ woman astronaut: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDE80",
+
+    "1F469 1F3FD 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿš€ woman astronaut: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDE80",
+
+    "1F469 1F3FE 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿš€ woman astronaut: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDE80",
+
+    "1F469 1F3FF 200D 1F680                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿš€ woman astronaut: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDE80",
+
+    "1F468 200D 1F692                           ; fully-qualified     # ๐Ÿ‘จโ€๐Ÿš’ man firefighter",
+    "\uD83D\uDC68\u200D\uD83D\uDE92",
+
+    "1F468 1F3FB 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿปโ€๐Ÿš’ man firefighter: light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFB\u200D\uD83D\uDE92",
+
+    "1F468 1F3FC 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿผโ€๐Ÿš’ man firefighter: medium-light skin tone",
+    "\uD83D\uDC68\uD83C\uDFFC\u200D\uD83D\uDE92",
+
+    "1F468 1F3FD 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฝโ€๐Ÿš’ man firefighter: medium skin tone",
+    "\uD83D\uDC68\uD83C\uDFFD\u200D\uD83D\uDE92",
+
+    "1F468 1F3FE 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿพโ€๐Ÿš’ man firefighter: medium-dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFE\u200D\uD83D\uDE92",
+
+    "1F468 1F3FF 200D 1F692                     ; fully-qualified     # ๐Ÿ‘จ๐Ÿฟโ€๐Ÿš’ man firefighter: dark skin tone",
+    "\uD83D\uDC68\uD83C\uDFFF\u200D\uD83D\uDE92",
+
+    "1F469 200D 1F692                           ; fully-qualified     # ๐Ÿ‘ฉโ€๐Ÿš’ woman firefighter",
+    "\uD83D\uDC69\u200D\uD83D\uDE92",
+
+    "1F469 1F3FB 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿปโ€๐Ÿš’ woman firefighter: light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFB\u200D\uD83D\uDE92",
+
+    "1F469 1F3FC 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿผโ€๐Ÿš’ woman firefighter: medium-light skin tone",
+    "\uD83D\uDC69\uD83C\uDFFC\u200D\uD83D\uDE92",
+
+    "1F469 1F3FD 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฝโ€๐Ÿš’ woman firefighter: medium skin tone",
+    "\uD83D\uDC69\uD83C\uDFFD\u200D\uD83D\uDE92",
+
+    "1F469 1F3FE 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿพโ€๐Ÿš’ woman firefighter: medium-dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFE\u200D\uD83D\uDE92",
+
+    "1F469 1F3FF 200D 1F692                     ; fully-qualified     # ๐Ÿ‘ฉ๐Ÿฟโ€๐Ÿš’ woman firefighter: dark skin tone",
+    "\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDE92",
+
+    "1F46E                                      ; fully-qualified     # ๐Ÿ‘ฎ police officer",
+    "\uD83D\uDC6E",
+
+    "1F46E 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿป police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB",
+
+    "1F46E 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผ police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC",
+
+    "1F46E 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝ police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD",
+
+    "1F46E 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพ police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE",
+
+    "1F46E 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟ police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF",
+
+    "1F46E 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ฎโ€โ™‚๏ธ man police officer",
+    "\uD83D\uDC6E\u200D\u2642\uFE0F",
+
+    "1F46E 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ฎโ€โ™‚ man police officer",
+    "\uD83D\uDC6E\u200D\u2642",
+
+    "1F46E 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚๏ธ man police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿปโ€โ™‚ man police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2642",
+
+    "1F46E 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผโ€โ™‚๏ธ man police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿผโ€โ™‚ man police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2642",
+
+    "1F46E 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝโ€โ™‚๏ธ man police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฝโ€โ™‚ man police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2642",
+
+    "1F46E 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพโ€โ™‚๏ธ man police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿพโ€โ™‚ man police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2642",
+
+    "1F46E 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟโ€โ™‚๏ธ man police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F46E 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฟโ€โ™‚ man police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2642",
+
+    "1F46E 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ฎโ€โ™€๏ธ woman police officer",
+    "\uD83D\uDC6E\u200D\u2640\uFE0F",
+
+    "1F46E 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ฎโ€โ™€ woman police officer",
+    "\uD83D\uDC6E\u200D\u2640",
+
+    "1F46E 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿปโ€โ™€๏ธ woman police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿปโ€โ™€ woman police officer: light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFB\u200D\u2640",
+
+    "1F46E 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿผโ€โ™€๏ธ woman police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿผโ€โ™€ woman police officer: medium-light skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFC\u200D\u2640",
+
+    "1F46E 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฝโ€โ™€๏ธ woman police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฝโ€โ™€ woman police officer: medium skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFD\u200D\u2640",
+
+    "1F46E 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿพโ€โ™€๏ธ woman police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿพโ€โ™€ woman police officer: medium-dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFE\u200D\u2640",
+
+    "1F46E 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ woman police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F46E 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ฎ๐Ÿฟโ€โ™€ woman police officer: dark skin tone",
+    "\uD83D\uDC6E\uD83C\uDFFF\u200D\u2640",
+
+    "1F575 FE0F                                 ; fully-qualified     # ๐Ÿ•ต๏ธ detective",
+    "\uD83D\uDD75\uFE0F",
+
+    "1F575                                      ; non-fully-qualified # ๐Ÿ•ต detective",
+    "\uD83D\uDD75",
+
+    "1F575 1F3FB                                ; fully-qualified     # ๐Ÿ•ต๐Ÿป detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB",
+
+    "1F575 1F3FC                                ; fully-qualified     # ๐Ÿ•ต๐Ÿผ detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC",
+
+    "1F575 1F3FD                                ; fully-qualified     # ๐Ÿ•ต๐Ÿฝ detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD",
+
+    "1F575 1F3FE                                ; fully-qualified     # ๐Ÿ•ต๐Ÿพ detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE",
+
+    "1F575 1F3FF                                ; fully-qualified     # ๐Ÿ•ต๐Ÿฟ detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF",
+
+    "1F575 FE0F 200D 2642 FE0F                  ; fully-qualified     # ๐Ÿ•ต๏ธโ€โ™‚๏ธ man detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2642\uFE0F",
+
+    "1F575 200D 2642 FE0F                       ; non-fully-qualified # ๐Ÿ•ตโ€โ™‚๏ธ man detective",
+    "\uD83D\uDD75\u200D\u2642\uFE0F",
+
+    "1F575 FE0F 200D 2642                       ; non-fully-qualified # ๐Ÿ•ต๏ธโ€โ™‚ man detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2642",
+
+    "1F575 200D 2642                            ; non-fully-qualified # ๐Ÿ•ตโ€โ™‚ man detective",
+    "\uD83D\uDD75\u200D\u2642",
+
+    "1F575 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿปโ€โ™‚๏ธ man detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F575 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿปโ€โ™‚ man detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2642",
+
+    "1F575 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿผโ€โ™‚๏ธ man detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F575 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿผโ€โ™‚ man detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2642",
+
+    "1F575 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฝโ€โ™‚๏ธ man detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F575 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฝโ€โ™‚ man detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2642",
+
+    "1F575 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿพโ€โ™‚๏ธ man detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F575 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿพโ€โ™‚ man detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2642",
+
+    "1F575 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฟโ€โ™‚๏ธ man detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F575 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฟโ€โ™‚ man detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2642",
+
+    "1F575 FE0F 200D 2640 FE0F                  ; fully-qualified     # ๐Ÿ•ต๏ธโ€โ™€๏ธ woman detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2640\uFE0F",
+
+    "1F575 200D 2640 FE0F                       ; non-fully-qualified # ๐Ÿ•ตโ€โ™€๏ธ woman detective",
+    "\uD83D\uDD75\u200D\u2640\uFE0F",
+
+    "1F575 FE0F 200D 2640                       ; non-fully-qualified # ๐Ÿ•ต๏ธโ€โ™€ woman detective",
+    "\uD83D\uDD75\uFE0F\u200D\u2640",
+
+    "1F575 200D 2640                            ; non-fully-qualified # ๐Ÿ•ตโ€โ™€ woman detective",
+    "\uD83D\uDD75\u200D\u2640",
+
+    "1F575 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿปโ€โ™€๏ธ woman detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F575 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿปโ€โ™€ woman detective: light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFB\u200D\u2640",
+
+    "1F575 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿผโ€โ™€๏ธ woman detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F575 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿผโ€โ™€ woman detective: medium-light skin tone",
+    "\uD83D\uDD75\uD83C\uDFFC\u200D\u2640",
+
+    "1F575 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฝโ€โ™€๏ธ woman detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F575 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฝโ€โ™€ woman detective: medium skin tone",
+    "\uD83D\uDD75\uD83C\uDFFD\u200D\u2640",
+
+    "1F575 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿพโ€โ™€๏ธ woman detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F575 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿพโ€โ™€ woman detective: medium-dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFE\u200D\u2640",
+
+    "1F575 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ•ต๐Ÿฟโ€โ™€๏ธ woman detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F575 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ•ต๐Ÿฟโ€โ™€ woman detective: dark skin tone",
+    "\uD83D\uDD75\uD83C\uDFFF\u200D\u2640",
+
+    "1F482                                      ; fully-qualified     # ๐Ÿ’‚ guard",
+    "\uD83D\uDC82",
+
+    "1F482 1F3FB                                ; fully-qualified     # ๐Ÿ’‚๐Ÿป guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB",
+
+    "1F482 1F3FC                                ; fully-qualified     # ๐Ÿ’‚๐Ÿผ guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC",
+
+    "1F482 1F3FD                                ; fully-qualified     # ๐Ÿ’‚๐Ÿฝ guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD",
+
+    "1F482 1F3FE                                ; fully-qualified     # ๐Ÿ’‚๐Ÿพ guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE",
+
+    "1F482 1F3FF                                ; fully-qualified     # ๐Ÿ’‚๐Ÿฟ guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF",
+
+    "1F482 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ’‚โ€โ™‚๏ธ man guard",
+    "\uD83D\uDC82\u200D\u2642\uFE0F",
+
+    "1F482 200D 2642                            ; non-fully-qualified # ๐Ÿ’‚โ€โ™‚ man guard",
+    "\uD83D\uDC82\u200D\u2642",
+
+    "1F482 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿปโ€โ™‚๏ธ man guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F482 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿปโ€โ™‚ man guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2642",
+
+    "1F482 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿผโ€โ™‚๏ธ man guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F482 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿผโ€โ™‚ man guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2642",
+
+    "1F482 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฝโ€โ™‚๏ธ man guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F482 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฝโ€โ™‚ man guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2642",
+
+    "1F482 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿพโ€โ™‚๏ธ man guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F482 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿพโ€โ™‚ man guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2642",
+
+    "1F482 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฟโ€โ™‚๏ธ man guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F482 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฟโ€โ™‚ man guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2642",
+
+    "1F482 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ’‚โ€โ™€๏ธ woman guard",
+    "\uD83D\uDC82\u200D\u2640\uFE0F",
+
+    "1F482 200D 2640                            ; non-fully-qualified # ๐Ÿ’‚โ€โ™€ woman guard",
+    "\uD83D\uDC82\u200D\u2640",
+
+    "1F482 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿปโ€โ™€๏ธ woman guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F482 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿปโ€โ™€ woman guard: light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFB\u200D\u2640",
+
+    "1F482 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿผโ€โ™€๏ธ woman guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F482 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿผโ€โ™€ woman guard: medium-light skin tone",
+    "\uD83D\uDC82\uD83C\uDFFC\u200D\u2640",
+
+    "1F482 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฝโ€โ™€๏ธ woman guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F482 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฝโ€โ™€ woman guard: medium skin tone",
+    "\uD83D\uDC82\uD83C\uDFFD\u200D\u2640",
+
+    "1F482 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿพโ€โ™€๏ธ woman guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F482 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿพโ€โ™€ woman guard: medium-dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFE\u200D\u2640",
+
+    "1F482 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ’‚๐Ÿฟโ€โ™€๏ธ woman guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F482 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ’‚๐Ÿฟโ€โ™€ woman guard: dark skin tone",
+    "\uD83D\uDC82\uD83C\uDFFF\u200D\u2640",
+
+    "1F477                                      ; fully-qualified     # ๐Ÿ‘ท construction worker",
+    "\uD83D\uDC77",
+
+    "1F477 1F3FB                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿป construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB",
+
+    "1F477 1F3FC                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿผ construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC",
+
+    "1F477 1F3FD                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝ construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD",
+
+    "1F477 1F3FE                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿพ construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE",
+
+    "1F477 1F3FF                                ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟ construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF",
+
+    "1F477 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ทโ€โ™‚๏ธ man construction worker",
+    "\uD83D\uDC77\u200D\u2642\uFE0F",
+
+    "1F477 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ทโ€โ™‚ man construction worker",
+    "\uD83D\uDC77\u200D\u2642",
+
+    "1F477 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿปโ€โ™‚๏ธ man construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F477 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿปโ€โ™‚ man construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2642",
+
+    "1F477 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿผโ€โ™‚๏ธ man construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F477 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿผโ€โ™‚ man construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2642",
+
+    "1F477 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝโ€โ™‚๏ธ man construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F477 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฝโ€โ™‚ man construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2642",
+
+    "1F477 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿพโ€โ™‚๏ธ man construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F477 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿพโ€โ™‚ man construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2642",
+
+    "1F477 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟโ€โ™‚๏ธ man construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F477 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฟโ€โ™‚ man construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2642",
+
+    "1F477 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ทโ€โ™€๏ธ woman construction worker",
+    "\uD83D\uDC77\u200D\u2640\uFE0F",
+
+    "1F477 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ทโ€โ™€ woman construction worker",
+    "\uD83D\uDC77\u200D\u2640",
+
+    "1F477 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿปโ€โ™€๏ธ woman construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F477 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿปโ€โ™€ woman construction worker: light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFB\u200D\u2640",
+
+    "1F477 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿผโ€โ™€๏ธ woman construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F477 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿผโ€โ™€ woman construction worker: medium-light skin tone",
+    "\uD83D\uDC77\uD83C\uDFFC\u200D\u2640",
+
+    "1F477 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฝโ€โ™€๏ธ woman construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F477 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฝโ€โ™€ woman construction worker: medium skin tone",
+    "\uD83D\uDC77\uD83C\uDFFD\u200D\u2640",
+
+    "1F477 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿพโ€โ™€๏ธ woman construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F477 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿพโ€โ™€ woman construction worker: medium-dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFE\u200D\u2640",
+
+    "1F477 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ท๐Ÿฟโ€โ™€๏ธ woman construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F477 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ท๐Ÿฟโ€โ™€ woman construction worker: dark skin tone",
+    "\uD83D\uDC77\uD83C\uDFFF\u200D\u2640",
+
+    "1F934                                      ; fully-qualified     # ๐Ÿคด prince",
+    "\uD83E\uDD34",
+
+    "1F934 1F3FB                                ; fully-qualified     # ๐Ÿคด๐Ÿป prince: light skin tone",
+    "\uD83E\uDD34\uD83C\uDFFB",
+
+    "1F934 1F3FC                                ; fully-qualified     # ๐Ÿคด๐Ÿผ prince: medium-light skin tone",
+    "\uD83E\uDD34\uD83C\uDFFC",
+
+    "1F934 1F3FD                                ; fully-qualified     # ๐Ÿคด๐Ÿฝ prince: medium skin tone",
+    "\uD83E\uDD34\uD83C\uDFFD",
+
+    "1F934 1F3FE                                ; fully-qualified     # ๐Ÿคด๐Ÿพ prince: medium-dark skin tone",
+    "\uD83E\uDD34\uD83C\uDFFE",
+
+    "1F934 1F3FF                                ; fully-qualified     # ๐Ÿคด๐Ÿฟ prince: dark skin tone",
+    "\uD83E\uDD34\uD83C\uDFFF",
+
+    "1F478                                      ; fully-qualified     # ๐Ÿ‘ธ princess",
+    "\uD83D\uDC78",
+
+    "1F478 1F3FB                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿป princess: light skin tone",
+    "\uD83D\uDC78\uD83C\uDFFB",
+
+    "1F478 1F3FC                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿผ princess: medium-light skin tone",
+    "\uD83D\uDC78\uD83C\uDFFC",
+
+    "1F478 1F3FD                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿฝ princess: medium skin tone",
+    "\uD83D\uDC78\uD83C\uDFFD",
+
+    "1F478 1F3FE                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿพ princess: medium-dark skin tone",
+    "\uD83D\uDC78\uD83C\uDFFE",
+
+    "1F478 1F3FF                                ; fully-qualified     # ๐Ÿ‘ธ๐Ÿฟ princess: dark skin tone",
+    "\uD83D\uDC78\uD83C\uDFFF",
+
+    "1F473                                      ; fully-qualified     # ๐Ÿ‘ณ person wearing turban",
+    "\uD83D\uDC73",
+
+    "1F473 1F3FB                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿป person wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB",
+
+    "1F473 1F3FC                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผ person wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC",
+
+    "1F473 1F3FD                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝ person wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD",
+
+    "1F473 1F3FE                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพ person wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE",
+
+    "1F473 1F3FF                                ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟ person wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF",
+
+    "1F473 200D 2642 FE0F                       ; fully-qualified     # ๐Ÿ‘ณโ€โ™‚๏ธ man wearing turban",
+    "\uD83D\uDC73\u200D\u2642\uFE0F",
+
+    "1F473 200D 2642                            ; non-fully-qualified # ๐Ÿ‘ณโ€โ™‚ man wearing turban",
+    "\uD83D\uDC73\u200D\u2642",
+
+    "1F473 1F3FB 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ man wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2642\uFE0F",
+
+    "1F473 1F3FB 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿปโ€โ™‚ man wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2642",
+
+    "1F473 1F3FC 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผโ€โ™‚๏ธ man wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2642\uFE0F",
+
+    "1F473 1F3FC 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿผโ€โ™‚ man wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2642",
+
+    "1F473 1F3FD 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝโ€โ™‚๏ธ man wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2642\uFE0F",
+
+    "1F473 1F3FD 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฝโ€โ™‚ man wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2642",
+
+    "1F473 1F3FE 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพโ€โ™‚๏ธ man wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2642\uFE0F",
+
+    "1F473 1F3FE 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿพโ€โ™‚ man wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2642",
+
+    "1F473 1F3FF 200D 2642 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟโ€โ™‚๏ธ man wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2642\uFE0F",
+
+    "1F473 1F3FF 200D 2642                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฟโ€โ™‚ man wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2642",
+
+    "1F473 200D 2640 FE0F                       ; fully-qualified     # ๐Ÿ‘ณโ€โ™€๏ธ woman wearing turban",
+    "\uD83D\uDC73\u200D\u2640\uFE0F",
+
+    "1F473 200D 2640                            ; non-fully-qualified # ๐Ÿ‘ณโ€โ™€ woman wearing turban",
+    "\uD83D\uDC73\u200D\u2640",
+
+    "1F473 1F3FB 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿปโ€โ™€๏ธ woman wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2640\uFE0F",
+
+    "1F473 1F3FB 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿปโ€โ™€ woman wearing turban: light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFB\u200D\u2640",
+
+    "1F473 1F3FC 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿผโ€โ™€๏ธ woman wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2640\uFE0F",
+
+    "1F473 1F3FC 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿผโ€โ™€ woman wearing turban: medium-light skin tone",
+    "\uD83D\uDC73\uD83C\uDFFC\u200D\u2640",
+
+    "1F473 1F3FD 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฝโ€โ™€๏ธ woman wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2640\uFE0F",
+
+    "1F473 1F3FD 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฝโ€โ™€ woman wearing turban: medium skin tone",
+    "\uD83D\uDC73\uD83C\uDFFD\u200D\u2640",
+
+    "1F473 1F3FE 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿพโ€โ™€๏ธ woman wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2640\uFE0F",
+
+    "1F473 1F3FE 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿพโ€โ™€ woman wearing turban: medium-dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFE\u200D\u2640",
+
+    "1F473 1F3FF 200D 2640 FE0F                 ; fully-qualified     # ๐Ÿ‘ณ๐Ÿฟโ€โ™€๏ธ woman wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2640\uFE0F",
+
+    "1F473 1F3FF 200D 2640                      ; non-fully-qualified # ๐Ÿ‘ณ๐Ÿฟโ€โ™€ woman wearing turban: dark skin tone",
+    "\uD83D\uDC73\uD83C\uDFFF\u200D\u2640",
+
+    "1F472                                      ; fully-qualified     # ๐Ÿ‘ฒ man with Chinese cap",
+    "\uD83D\uDC72",
+
+    "1F472 1F3FB                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿป man with Chinese cap: light skin tone",
+    "\uD83D\uDC72\uD83C\uDFFB",
+
+    "1F472 1F3FC                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿผ man with Chinese cap: medium-light skin tone",
+    "\uD83D\uDC72\uD83C\uDFFC",
+
+    "1F472 1F3FD                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿฝ man with Chinese cap: medium skin tone",
+    "\uD83D\uDC72\uD83C\uDFFD",
+
+    "1F472 1F3FE                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿพ man with Chinese cap: medium-dark skin tone",
+    "\uD83D\uDC72\uD83C\uDFFE",
+
+    "1F472 1F3FF                                ; fully-qualified     # ๐Ÿ‘ฒ๐Ÿฟ man with Chinese cap: dark skin tone",
+    "\uD83D\uDC72\uD83C\uDFFF",
+
+    "1F9D5                                      ; fully-qualified     # ๐Ÿง• woman with headscarf",
+    "\uD83E\uDDD5",
+
+    "1F9D5 1F3FB                                ; fully-qualified     # ๐Ÿง•๐Ÿป woman with headscarf: light skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFB",
+
+    "1F9D5 1F3FC                                ; fully-qualified     # ๐Ÿง•๐Ÿผ woman with headscarf: medium-light skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFC",
+
+    "1F9D5 1F3FD                                ; fully-qualified     # ๐Ÿง•๐Ÿฝ woman with headscarf: medium skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFD",
+
+    "1F9D5 1F3FE                                ; fully-qualified     # ๐Ÿง•๐Ÿพ woman with headscarf: medium-dark skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFE",
+
+    "1F9D5 1F3FF                                ; fully-qualified     # ๐Ÿง•๐Ÿฟ woman with headscarf: dark skin tone",
+    "\uD83E\uDDD5\uD83C\uDFFF",
+
+    "1F9D4                                      ; fully-qualified     # ๐Ÿง” bearded person",
+    "\uD83E\uDDD4",
+
+    "1F9D4 1F3FB                                ; fully-qualified     # ๐Ÿง”๐Ÿป bearded person: light skin t

<TRUNCATED>

[08/24] lucene-solr:master: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '<EMOJI>' token type.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/283b19a8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/283b19a8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/283b19a8

Branch: refs/heads/master
Commit: 283b19a8da6ab9e0b7e9a75b132d3067218d5502
Parents: 7db4121
Author: Steve Rowe <sa...@apache.org>
Authored: Tue Jan 8 13:33:49 2019 -0500
Committer: Steve Rowe <sa...@apache.org>
Committed: Tue Jan 8 13:33:49 2019 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |     8 +
 lucene/analysis/common/build.xml                |    32 +-
 .../charfilter/HTMLStripCharFilter.java         |   834 +-
 .../charfilter/HTMLStripCharFilter.jflex        |    22 +-
 .../analysis/standard/ClassicTokenizerImpl.java |   141 +-
 .../standard/UAX29URLEmailTokenizer.java        |    44 +-
 .../standard/UAX29URLEmailTokenizerImpl.java    | 74906 +++++++++--------
 .../standard/UAX29URLEmailTokenizerImpl.jflex   |   216 +-
 .../wikipedia/WikipediaTokenizerImpl.java       |   465 +-
 .../charfilter/HTMLStripCharFilterTest.java     |     2 +-
 .../standard/TestUAX29URLEmailAnalyzer.java     |     4 +-
 .../standard/TestUAX29URLEmailTokenizer.java    |    76 +-
 lucene/common-build.xml                         |    21 +-
 .../src/data/jflex/UnicodeEmojiProperties.jflex |    25 +
 .../src/data/jflex/getUnicodeEmojiProperties.pl |   168 +
 lucene/core/src/data/jflex/skeleton.default     |   342 +
 .../jflex/skeleton.disable.buffer.expansion.txt |   348 +
 .../standard/StandardTokenizerImpl.java         |   637 +-
 .../standard/StandardTokenizerImpl.jflex        |   206 +-
 .../analysis/standard/TestStandardAnalyzer.java |   131 +-
 .../EmojiTokenizationTestUnicode_11_0.java      | 10756 +++
 .../standard/WordBreakTestUnicode_6_3_0.java    |  5537 --
 .../standard/WordBreakTestUnicode_9_0_0.java    |  8276 ++
 .../standard/generateEmojiTokenizationTest.pl   |   150 +
 .../generateJavaUnicodeWordBreakTest.pl         |    41 +-
 25 files changed, 62395 insertions(+), 40993 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7169cf6..3b98955 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -241,6 +241,11 @@ Optimizations
 
 ======================= Lucene 7.7.0 =======================
 
+Changes in Runtime Behavior
+
+* LUCENE-8527: StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0,
+  and provide Unicode UTS#51 v11.0 Emoji tokenization with the "<EMOJI>" token type. 
+
 Build
 
 * LUCENE-8611: Update randomizedtesting to 2.7.2, JUnit to 4.12, add hamcrest-core 
@@ -293,6 +298,9 @@ Improvements
 
 * LUCENE-8581: Change LatLonShape encoding to use 4 bytes Per Dimension.
   (Ignacio Vera, Nick Knize, Adrien Grand)
+  
+* LUCENE-8527: Upgrade JFlex dependency to 1.7.0; in StandardTokenizer and UAX29URLEmailTokenizer,
+  increase supported Unicode version from 6.3 to 9.0, and support Unicode UTS#51 v11.0 Emoji tokenization.
 
 Optimizations
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/build.xml b/lucene/analysis/common/build.xml
index b8eb37a..f752ecc 100644
--- a/lucene/analysis/common/build.xml
+++ b/lucene/analysis/common/build.xml
@@ -33,18 +33,14 @@
 
   <property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
 
-  <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-ClassicAnalyzer,-jflex-UAX29URLEmailTokenizer,
-                                -jflex-wiki-tokenizer,-jflex-HTMLStripCharFilter"/>
-
-  <target name="-jflex-HTMLStripCharFilter"
-          depends="init,generate-jflex-html-char-entities">
-    <jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex"
-           outdir="src/java/org/apache/lucene/analysis/charfilter"
-           nobak="on" inputstreamctor="false"/>
-    <!-- Remove the inappropriate JFlex-generated constructor -->
-    <replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
-                   match="/\*\*\s*\*\s*Creates a new scanner\s*\*\s*\*\s*@param\s*in\s*the java.io.Reader to read input from\.\s*\*/\s*public HTMLStripCharFilter\(java\.io\.Reader in\)\s*\{\s*this.zzReader = in;\s*\}"
-                   replace="" flags="s"/>
+  <!-- Because of a bug in JFlex's ant task, HTMLStripCharFilter has to be generated last.   -->
+  <!-- Otherwise the "%apiprivate" option used in its specification will leak into following -->
+  <!-- ant task invocations.                                                                 -->
+  <target name="jflex" depends="init,clean-jflex,-jflex-wiki-tokenizer,-jflex-ClassicAnalyzer,
+                                -jflex-UAX29URLEmailTokenizer,-jflex-HTMLStripCharFilter"/>
+
+  <target name="-jflex-HTMLStripCharFilter" depends="-install-jflex,generate-jflex-html-char-entities">
+    <run-jflex dir="src/java/org/apache/lucene/analysis/charfilter" name="HTMLStripCharFilter"/>
   </target>
 
   <target name="generate-jflex-html-char-entities">
@@ -58,17 +54,17 @@
     <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>
   </target>
 
-  <target name="-jflex-wiki-tokenizer" depends="init,-install-jflex">
+  <target name="-jflex-wiki-tokenizer" depends="-install-jflex">
     <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-UAX29URLEmailTokenizer" depends="init,-install-jflex">
-    <run-jflex-and-disable-buffer-expansion
-        dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
+  <target name="-jflex-ClassicAnalyzer" depends="-install-jflex">
+    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-ClassicAnalyzer" depends="init,-install-jflex">
-    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
+  <target name="-jflex-UAX29URLEmailTokenizer" depends="-install-jflex">
+    <run-jflex-and-disable-buffer-expansion
+        dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
   </target>
 
   <target name="clean-jflex">

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
index a236497..ae67bde 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -95,127 +95,152 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     "\32\0\1\41\11\0\1\1\12\0\1\1\1\0\1\2\2\0\1\1"+
     "\5\0\27\1\1\0\37\1\1\0\u01ca\1\4\0\14\1\16\0\5\1"+
     "\7\0\1\1\1\0\1\1\21\0\160\2\5\1\1\0\2\1\2\0"+
-    "\4\1\10\0\1\1\1\2\3\1\1\0\1\1\1\0\24\1\1\0"+
-    "\123\1\1\0\213\1\1\0\5\2\2\0\236\1\11\0\46\1\2\0"+
-    "\1\1\7\0\47\1\11\0\55\2\1\0\1\2\1\0\2\2\1\0"+
-    "\2\2\1\0\1\2\10\0\33\1\5\0\3\1\35\0\13\2\5\0"+
-    "\53\1\25\2\12\111\4\0\2\1\1\2\143\1\1\0\1\1\7\2"+
-    "\2\0\6\2\2\1\2\2\1\0\4\2\2\1\12\111\3\1\2\0"+
-    "\1\1\20\0\1\1\1\2\36\1\33\2\2\0\131\1\13\2\1\1"+
-    "\16\0\12\111\41\1\11\2\2\1\4\0\1\1\5\0\26\1\4\2"+
-    "\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\104\0"+
-    "\1\1\1\0\13\1\67\0\33\2\1\0\4\2\66\1\3\2\1\1"+
-    "\22\2\1\1\7\2\12\1\2\2\2\0\12\111\1\0\7\1\1\0"+
-    "\7\1\1\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+
-    "\7\1\1\0\1\1\3\0\4\1\2\0\1\2\1\1\7\2\2\0"+
-    "\2\2\2\0\3\2\1\1\10\0\1\2\4\0\2\1\1\0\3\1"+
-    "\2\2\2\0\12\111\2\1\17\0\3\2\1\0\6\1\4\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\2\1\1\0\2\1"+
-    "\2\0\1\2\1\0\5\2\4\0\2\2\2\0\3\2\3\0\1\2"+
-    "\7\0\4\1\1\0\1\1\7\0\12\111\2\2\3\1\1\2\13\0"+
-    "\3\2\1\0\11\1\1\0\3\1\1\0\26\1\1\0\7\1\1\0"+
-    "\2\1\1\0\5\1\2\0\1\2\1\1\10\2\1\0\3\2\1\0"+
-    "\3\2\2\0\1\1\17\0\2\1\2\2\2\0\12\111\21\0\3\2"+
-    "\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1"+
-    "\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2\2\0\3\2"+
-    "\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0\12\111\1\0"+
-    "\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1\1\0\4\1"+
-    "\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1\3\0\3\1"+
-    "\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2\2\0\1\1"+
-    "\6\0\1\2\16\0\12\111\21\0\3\2\1\0\10\1\1\0\3\1"+
-    "\1\0\27\1\1\0\12\1\1\0\5\1\3\0\1\1\7\2\1\0"+
-    "\3\2\1\0\4\2\7\0\2\2\1\0\2\1\6\0\2\1\2\2"+
-    "\2\0\12\111\22\0\2\2\1\0\10\1\1\0\3\1\1\0\27\1"+
+    "\4\1\1\0\1\1\6\0\1\1\1\2\3\1\1\0\1\1\1\0"+
+    "\24\1\1\0\123\1\1\0\213\1\1\0\5\2\2\0\246\1\1\0"+
+    "\46\1\2\0\1\1\7\0\47\1\11\0\55\2\1\0\1\2\1\0"+
+    "\2\2\1\0\2\2\1\0\1\2\10\0\33\1\5\0\3\1\35\0"+
+    "\13\2\5\0\53\1\25\2\12\111\4\0\2\1\1\2\143\1\1\0"+
+    "\1\1\7\2\2\0\6\2\2\1\2\2\1\0\4\2\2\1\12\111"+
+    "\3\1\2\0\1\1\20\0\1\1\1\2\36\1\33\2\2\0\131\1"+
+    "\13\2\1\1\16\0\12\111\41\1\11\2\2\1\4\0\1\1\5\0"+
+    "\26\1\4\2\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1"+
+    "\3\2\104\0\25\1\1\0\10\1\26\0\16\2\1\0\41\2\66\1"+
+    "\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0\12\111\1\0"+
+    "\20\1\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\1\1\3\0\4\1\2\0\1\2\1\1\7\2\2\0\2\2"+
+    "\2\0\3\2\1\1\10\0\1\2\4\0\2\1\1\0\3\1\2\2"+
+    "\2\0\12\111\2\1\17\0\3\2\1\0\6\1\4\0\2\1\2\0"+
+    "\26\1\1\0\7\1\1\0\2\1\1\0\2\1\1\0\2\1\2\0"+
+    "\1\2\1\0\5\2\4\0\2\2\2\0\3\2\3\0\1\2\7\0"+
+    "\4\1\1\0\1\1\7\0\12\111\2\2\3\1\1\2\13\0\3\2"+
+    "\1\0\11\1\1\0\3\1\1\0\26\1\1\0\7\1\1\0\2\1"+
+    "\1\0\5\1\2\0\1\2\1\1\10\2\1\0\3\2\1\0\3\2"+
+    "\2\0\1\1\17\0\2\1\2\2\2\0\12\111\11\0\1\1\7\0"+
+    "\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0"+
+    "\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2\2\0"+
+    "\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0\12\111"+
+    "\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1\1\0"+
+    "\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1\3\0"+
+    "\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2\2\0"+
+    "\1\1\6\0\1\2\16\0\12\111\20\0\4\2\1\0\10\1\1\0"+
+    "\3\1\1\0\27\1\1\0\20\1\3\0\1\1\7\2\1\0\3\2"+
+    "\1\0\4\2\7\0\2\2\1\0\3\1\5\0\2\1\2\2\2\0"+
+    "\12\111\20\0\1\1\3\2\1\0\10\1\1\0\3\1\1\0\27\1"+
     "\1\0\12\1\1\0\5\1\2\0\1\2\1\1\7\2\1\0\3\2"+
     "\1\0\4\2\7\0\2\2\7\0\1\1\1\0\2\1\2\2\2\0"+
-    "\12\111\1\0\2\1\17\0\2\2\1\0\10\1\1\0\3\1\1\0"+
-    "\51\1\2\0\1\1\7\2\1\0\3\2\1\0\4\2\1\1\10\0"+
-    "\1\2\10\0\2\1\2\2\2\0\12\111\12\0\6\1\2\0\2\2"+
-    "\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0\7\1"+
-    "\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\22\0\2\2"+
-    "\15\0\60\1\1\2\2\1\7\2\5\0\7\1\10\2\1\0\12\111"+
-    "\47\0\2\1\1\0\1\1\2\0\2\1\1\0\1\1\2\0\1\1"+
-    "\6\0\4\1\1\0\7\1\1\0\3\1\1\0\1\1\1\0\1\1"+
-    "\2\0\2\1\1\0\4\1\1\2\2\1\6\2\1\0\2\2\1\1"+
-    "\2\0\5\1\1\0\1\1\1\0\6\2\2\0\12\111\2\0\4\1"+
-    "\40\0\1\1\27\0\2\2\6\0\12\111\13\0\1\2\1\0\1\2"+
-    "\1\0\1\2\4\0\2\2\10\1\1\0\44\1\4\0\24\2\1\0"+
-    "\2\2\5\1\13\2\1\0\44\2\11\0\1\2\71\0\53\1\24\2"+
-    "\1\1\12\111\6\0\6\1\4\2\4\1\3\2\1\1\3\2\2\1"+
-    "\7\2\3\1\4\2\15\1\14\2\1\1\1\2\12\111\4\2\2\0"+
-    "\46\1\1\0\1\1\5\0\1\1\2\0\53\1\1\0\u014d\1\1\0"+
-    "\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0"+
-    "\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0"+
-    "\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0"+
-    "\3\2\11\0\11\2\16\0\20\1\20\0\125\1\14\0\u026c\1\2\0"+
-    "\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+
-    "\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+
-    "\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+
-    "\1\1\1\2\2\0\12\111\41\0\3\2\2\0\12\111\6\0\130\1"+
-    "\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+
-    "\4\0\14\2\12\0\12\111\36\1\2\0\5\1\13\0\54\1\4\0"+
-    "\21\2\7\1\2\2\6\0\12\111\1\2\45\0\27\1\5\2\4\0"+
-    "\65\1\12\2\1\0\35\2\2\0\1\2\12\111\6\0\12\111\15\0"+
-    "\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\111\21\0\11\2"+
-    "\14\0\3\2\36\1\15\2\2\1\12\111\54\1\16\2\14\0\44\1"+
-    "\24\2\10\0\12\111\3\0\3\1\12\111\44\1\122\0\3\2\1\0"+
-    "\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2\25\0"+
-    "\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1"+
-    "\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1"+
-    "\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1"+
-    "\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41"+
-    "\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41"+
-    "\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2"+
-    "\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1"+
-    "\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1"+
-    "\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1"+
-    "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1"+
-    "\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2"+
-    "\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0"+
-    "\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1"+
-    "\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0"+
-    "\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0"+
-    "\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\111"+
-    "\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0\1\2"+
-    "\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
-    "\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2\27\1"+
-    "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\111\6\0"+
-    "\22\2\6\1\3\0\1\1\4\0\12\111\34\1\10\2\2\0\27\1"+
-    "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\111"+
-    "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\111"+
-    "\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1\3\2"+
-    "\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1\2\0"+
-    "\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1\2\0"+
-    "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+
-    "\2\0\12\111\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u016e\1"+
-    "\2\0\152\1\46\0\7\1\14\0\5\1\5\0\1\1\1\2\12\1"+
-    "\1\0\15\1\1\0\5\1\1\0\1\1\1\0\2\1\1\0\2\1"+
-    "\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1"+
-    "\4\0\20\2\20\0\7\2\14\0\2\2\30\0\3\2\40\0\5\1"+
-    "\1\0\207\1\23\0\12\111\7\0\32\1\4\0\1\2\1\0\32\1"+
-    "\13\0\131\1\3\0\6\1\2\0\6\1\2\0\6\1\2\0\3\1"+
-    "\43\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1\1\0\17\1"+
-    "\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\2\202\0\35\1"+
-    "\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1\2\0\44\1"+
-    "\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\111\u0356\0\6\1"+
-    "\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1\2\0\27\1"+
-    "\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1\100\0\1\1"+
-    "\3\2\1\0\2\2\5\0\4\2\4\1\1\0\3\1\1\0\33\1"+
-    "\4\0\3\2\4\0\1\2\40\0\35\1\203\0\66\1\12\0\26\1"+
-    "\12\0\23\1\215\0\111\1\u03b7\0\3\2\65\1\17\2\37\0\12\111"+
-    "\20\0\3\2\55\1\13\2\25\0\31\1\7\0\12\111\6\0\3\2"+
-    "\44\1\16\2\1\0\12\111\100\0\3\2\60\1\16\2\4\1\13\0"+
-    "\12\111\u04a6\0\53\1\15\2\10\0\12\111\u0936\0\u036f\1\221\0\143\1"+
-    "\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1\13\0\1\1\56\2\20\0"+
-    "\4\2\15\1\u4060\0\2\1\u2163\0\5\2\3\0\6\2\10\0\10\2"+
-    "\2\0\7\2\36\0\4\2\224\0\3\2\u01bb\0\125\1\1\0\107\1"+
-    "\1\0\2\1\2\0\1\1\2\0\2\1\2\0\4\1\1\0\14\1"+
-    "\1\0\1\1\1\0\7\1\1\0\101\1\1\0\4\1\2\0\10\1"+
-    "\1\0\7\1\1\0\34\1\1\0\4\1\1\0\5\1\1\0\1\1"+
-    "\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0\31\1\1\0\37\1"+
-    "\1\0\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1"+
-    "\1\0\37\1\1\0\31\1\1\0\10\1\2\0\62\111\u1600\0\4\1"+
+    "\12\111\1\0\2\1\16\0\3\2\1\0\10\1\1\0\3\1\1\0"+
+    "\51\1\2\0\1\1\7\2\1\0\3\2\1\0\4\2\1\1\5\0"+
+    "\3\1\1\2\7\0\3\1\2\2\2\0\12\111\12\0\6\1\2\0"+
+    "\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0"+
+    "\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\6\0"+
+    "\12\111\2\0\2\2\15\0\60\1\1\2\2\1\7\2\5\0\7\1"+
+    "\10\2\1\0\12\111\47\0\2\1\1\0\1\1\2\0\2\1\1\0"+
+    "\1\1\2\0\1\1\6\0\4\1\1\0\7\1\1\0\3\1\1\0"+
+    "\1\1\1\0\1\1\2\0\2\1\1\0\4\1\1\2\2\1\6\2"+
+    "\1\0\2\2\1\1\2\0\5\1\1\0\1\1\1\0\6\2\2\0"+
+    "\12\111\2\0\4\1\40\0\1\1\27\0\2\2\6\0\12\111\13\0"+
+    "\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1"+
+    "\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0\1\2"+
+    "\71\0\53\1\24\2\1\1\12\111\6\0\6\1\4\2\4\1\3\2"+
+    "\1\1\3\2\2\1\7\2\3\1\4\2\15\1\14\2\1\1\1\2"+
+    "\12\111\4\2\2\0\46\1\1\0\1\1\5\0\1\1\2\0\53\1"+
+    "\1\0\u014d\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1"+
+    "\2\0\51\1\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1"+
+    "\1\0\1\1\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1"+
+    "\2\0\103\1\2\0\3\2\11\0\11\2\16\0\20\1\20\0\126\1"+
+    "\2\0\6\1\3\0\u026c\1\2\0\21\1\1\41\32\1\5\0\113\1"+
+    "\3\0\13\1\7\0\15\1\1\0\4\1\3\2\13\0\22\1\3\2"+
+    "\13\0\22\1\2\2\14\0\15\1\1\0\3\1\1\0\2\2\14\0"+
+    "\64\1\40\2\3\0\1\1\4\0\1\1\1\2\2\0\12\111\41\0"+
+    "\3\2\2\0\12\111\6\0\130\1\10\0\51\1\1\2\1\1\5\0"+
+    "\106\1\12\0\37\1\1\0\14\2\4\0\14\2\12\0\12\111\36\1"+
+    "\2\0\5\1\13\0\54\1\4\0\32\1\6\0\12\111\1\2\45\0"+
+    "\27\1\5\2\4\0\65\1\12\2\1\0\35\2\2\0\1\2\12\111"+
+    "\6\0\12\111\15\0\1\1\10\0\16\2\102\0\5\2\57\1\21\2"+
+    "\7\1\4\0\12\111\21\0\11\2\14\0\3\2\36\1\15\2\2\1"+
+    "\12\111\54\1\16\2\14\0\44\1\24\2\10\0\12\111\3\0\3\1"+
+    "\12\111\44\1\2\0\11\1\107\0\3\2\1\0\25\2\4\1\1\2"+
+    "\4\1\3\2\2\1\1\0\2\2\6\0\300\1\66\2\5\0\5\2"+
+    "\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0"+
+    "\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0"+
+    "\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0"+
+    "\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41\35\0"+
+    "\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41\21\0"+
+    "\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2\3\0"+
+    "\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1\2\0"+
+    "\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1\2\0"+
+    "\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1\1\0"+
+    "\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1\1\0"+
+    "\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2\27\1"+
+    "\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
+    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0\1\41"+
+    "\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1\4\0"+
+    "\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0\51\1"+
+    "\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0\u51d6\1"+
+    "\52\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\111\2\1"+
+    "\24\0\57\1\1\2\4\0\12\2\1\0\37\1\2\2\120\1\2\2"+
+    "\45\0\11\1\2\0\147\1\2\0\44\1\1\0\10\1\77\0\13\1"+
+    "\1\2\3\1\1\2\4\1\1\2\27\1\5\2\30\0\64\1\14\0"+
+    "\2\2\62\1\22\2\12\0\12\111\6\0\22\2\6\1\3\0\1\1"+
+    "\1\0\1\1\2\0\12\111\34\1\10\2\2\0\27\1\15\2\14\0"+
+    "\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\111\6\0\5\1"+
+    "\1\2\12\1\12\111\5\1\1\0\51\1\16\2\11\0\3\1\1\2"+
+    "\10\1\2\2\2\0\12\111\6\0\27\1\3\0\1\1\3\2\62\1"+
+    "\1\2\1\1\3\2\2\1\2\2\5\1\2\2\1\1\1\2\1\1"+
+    "\30\0\3\1\2\0\13\1\5\2\2\0\3\1\2\2\12\0\6\1"+
+    "\2\0\6\1\2\0\6\1\11\0\7\1\1\0\7\1\1\0\53\1"+
+    "\1\0\12\1\12\0\163\1\10\2\1\0\2\2\2\0\12\111\6\0"+
+    "\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u016e\1\2\0\152\1\46\0"+
+    "\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1\1\0"+
+    "\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1\41\0"+
+    "\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2\20\0"+
+    "\20\2\3\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1\23\0"+
+    "\12\111\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1\3\0"+
+    "\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0\14\1\1\0"+
+    "\32\1\1\0\23\1\1\0\2\1\1\0\17\1\2\0\16\1\42\0"+
+    "\173\1\105\0\65\1\210\0\1\2\202\0\35\1\3\0\61\1\17\0"+
+    "\1\2\37\0\40\1\20\0\33\1\5\0\46\1\5\2\5\0\36\1"+
+    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\111"+
+    "\6\0\44\1\4\0\44\1\4\0\50\1\10\0\64\1\234\0\u0137\1"+
+    "\11\0\26\1\12\0\10\1\230\0\6\1\2\0\1\1\1\0\54\1"+
+    "\1\0\2\1\3\0\1\1\2\0\27\1\12\0\27\1\11\0\37\1"+
+    "\101\0\23\1\1\0\2\1\12\0\26\1\12\0\32\1\106\0\70\1"+
+    "\6\0\2\1\100\0\1\1\3\2\1\0\2\2\5\0\4\2\4\1"+
+    "\1\0\3\1\1\0\33\1\4\0\3\2\4\0\1\2\40\0\35\1"+
+    "\3\0\35\1\43\0\10\1\1\0\34\1\2\2\31\0\66\1\12\0"+
+    "\26\1\12\0\23\1\15\0\22\1\156\0\111\1\67\0\63\1\15\0"+
+    "\63\1\u030d\0\3\2\65\1\17\2\37\0\12\111\17\0\4\2\55\1"+
+    "\13\2\25\0\31\1\7\0\12\111\6\0\3\2\44\1\16\2\1\0"+
+    "\12\111\20\0\43\1\1\2\2\0\1\1\11\0\3\2\60\1\16\2"+
+    "\4\1\5\0\3\2\3\0\12\111\1\1\1\0\1\1\43\0\22\1"+
+    "\1\0\31\1\14\2\6\0\1\2\101\0\7\1\1\0\1\1\1\0"+
+    "\4\1\1\0\17\1\1\0\12\1\7\0\57\1\14\2\5\0\12\111"+
+    "\6\0\4\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2"+
+    "\2\0\3\2\2\0\1\1\6\0\1\2\5\0\5\1\2\2\2\0"+
+    "\7\2\3\0\5\2\213\0\65\1\22\2\4\1\5\0\12\111\46\0"+
+    "\60\1\24\2\2\1\1\0\1\1\10\0\12\111\246\0\57\1\7\2"+
+    "\2\0\11\2\27\0\4\1\2\2\42\0\60\1\21\2\3\0\1\1"+
+    "\13\0\12\111\46\0\53\1\15\2\10\0\12\111\66\0\32\1\3\0"+
+    "\17\2\4\0\12\111\u0166\0\100\1\12\111\25\0\1\1\u01c0\0\71\1"+
+    "\u0107\0\11\1\1\0\45\1\10\2\1\0\10\2\1\1\17\0\12\111"+
+    "\30\0\36\1\2\0\26\2\1\0\16\2\u0349\0\u039a\1\146\0\157\1"+
+    "\21\0\304\1\u0abc\0\u042f\1\u0fd1\0\u0247\1\u21b9\0\u0239\1\7\0\37\1"+
+    "\1\0\12\111\146\0\36\1\2\0\5\2\13\0\60\1\7\2\11\0"+
+    "\4\1\14\0\12\111\11\0\25\1\5\0\23\1\u0370\0\105\1\13\0"+
+    "\1\1\56\2\20\0\4\2\15\1\100\0\1\1\37\0\u17ed\1\23\0"+
+    "\u02f3\1\u250d\0\2\1\u0bfe\0\153\1\5\0\15\1\3\0\11\1\7\0"+
+    "\12\1\3\0\2\2\u14c6\0\5\2\3\0\6\2\10\0\10\2\2\0"+
+    "\7\2\36\0\4\2\224\0\3\2\u01bb\0\125\1\1\0\107\1\1\0"+
+    "\2\1\2\0\1\1\2\0\2\1\2\0\4\1\1\0\14\1\1\0"+
+    "\1\1\1\0\7\1\1\0\101\1\1\0\4\1\2\0\10\1\1\0"+
+    "\7\1\1\0\34\1\1\0\4\1\1\0\5\1\1\0\1\1\3\0"+
+    "\7\1\1\0\u0154\1\2\0\31\1\1\0\31\1\1\0\37\1\1\0"+
+    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
+    "\37\1\1\0\31\1\1\0\10\1\2\0\62\111\u0200\0\67\2\4\0"+
+    "\62\2\10\0\1\2\16\0\1\2\26\0\5\2\1\0\17\2\u0550\0"+
+    "\7\2\1\0\21\2\2\0\7\2\1\0\2\2\1\0\5\2\u07d5\0"+
+    "\305\1\13\0\7\2\51\0\104\1\7\2\5\0\12\111\u04a6\0\4\1"+
     "\1\0\33\1\1\0\2\1\1\0\1\1\2\0\1\1\1\0\12\1"+
     "\1\0\4\1\1\0\1\1\1\0\1\1\6\0\1\1\4\0\1\1"+
     "\1\0\1\1\1\0\1\1\1\0\3\1\1\0\2\1\1\0\1\1"+
@@ -223,7 +248,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     "\1\0\2\1\1\0\1\1\2\0\4\1\1\0\7\1\1\0\4\1"+
     "\1\0\4\1\1\0\1\1\1\0\12\1\1\0\21\1\5\0\3\1"+
     "\1\0\5\1\1\0\21\1\u1144\0\ua6d7\1\51\0\u1035\1\13\0\336\1"+
-    "\u3fe2\0\u021e\1\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u06ed\0"+
+    "\2\0\u1682\1\u295e\0\u021e\1\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u06ed\0"+
     "\360\2\uffff\0\uffff\0\ufe12\0";
 
   /** 
@@ -29654,7 +29679,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -29809,11 +29834,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -29950,24 +29975,14 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   private TextSegment entitySegment = new TextSegment(2);
 
   /**
-   * Creates a new HTMLStripCharFilter over the provided Reader.
-   * @param source Reader to strip html tags from.
-   */
-  public HTMLStripCharFilter(Reader source) {
-    super(source);
-    this.zzReader = source;
-  }
-
-  /**
    * Creates a new HTMLStripCharFilter over the provided Reader
    * with the specified start and end tags.
-   * @param source Reader to strip html tags from.
+   * @param in Reader to strip html tags from.
    * @param escapedTags Tags in this set (both start and end tags)
    *  will not be filtered out.
    */
-  public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
-    super(source);
-    this.zzReader = source;
+  public HTMLStripCharFilter(Reader in, Set<String> escapedTags) {
+    this(in);
     if (null != escapedTags) {
       for (String tag : escapedTags) {
         if (tag.equalsIgnoreCase("BR")) {
@@ -30059,7 +30074,15 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
   }
 
 
-  
+  /**
+   * Creates a new scanner
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  public HTMLStripCharFilter(java.io.Reader in) {
+    super(in);
+    this.zzReader = in;
+  }
 
 
   /** 
@@ -30072,7 +30095,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     char [] map = new char[0x110000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2836) {
+    while (i < 3340) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -30116,28 +30139,29 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -30420,43 +30444,55 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { if (yylength() == 1) {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+            zzDoEOF();
+          {   return eofReturnValue;
+ }
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { if (yylength() == 1) {
     return zzBuffer[zzStartRead];
   } else {
     outputSegment.append(yytext()); return outputSegment.nextChar();
   }
-          }
-        case 55: break;
-        case 2: 
-          { inputStart = yychar;
+            } 
+            // fall through
+          case 55: break;
+          case 2: 
+            { inputStart = yychar;
   inputSegment.clear();
   inputSegment.append('<');
   yybegin(LEFT_ANGLE_BRACKET);
-          }
-        case 56: break;
-        case 3: 
-          { inputStart = yychar;
+            } 
+            // fall through
+          case 56: break;
+          case 3: 
+            { inputStart = yychar;
   inputSegment.clear();
   inputSegment.append('&');
   yybegin(AMPERSAND);
-          }
-        case 57: break;
-        case 4: 
-          { yypushback(yylength());
+            } 
+            // fall through
+          case 57: break;
+          case 4: 
+            { yypushback(yylength());
     outputSegment = inputSegment;
     outputSegment.restart();
     yybegin(YYINITIAL);
     return outputSegment.nextChar();
-          }
-        case 58: break;
-        case 5: 
-          { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
-          }
-        case 59: break;
-        case 6: 
-          { int matchLength = yylength();
+            } 
+            // fall through
+          case 58: break;
+          case 5: 
+            { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
+            } 
+            // fall through
+          case 59: break;
+          case 6: 
+            { int matchLength = yylength();
     inputSegment.write(zzBuffer, zzStartRead, matchLength);
     if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
       String decimalCharRef = yytext();
@@ -30487,180 +30523,206 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       yybegin(YYINITIAL);
       return outputSegment.nextChar();
     }
-          }
-        case 60: break;
-        case 7: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 60: break;
+          case 7: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
     yybegin(YYINITIAL);
     return outputSegment.nextChar();
-          }
-        case 61: break;
-        case 8: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 61: break;
+          case 8: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(START_TAG_TAIL_INCLUDE);
     } else {
       yybegin(START_TAG_TAIL_SUBSTITUTE);
     }
-          }
-        case 62: break;
-        case 9: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 62: break;
+          case 9: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(START_TAG_TAIL_INCLUDE);
     } else {
       yybegin(START_TAG_TAIL_EXCLUDE);
     }
-          }
-        case 63: break;
-        case 10: 
-          { inputSegment.append('!'); yybegin(BANG);
-          }
-        case 64: break;
-        case 11: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 63: break;
+          case 10: 
+            { inputSegment.append('!'); yybegin(BANG);
+            } 
+            // fall through
+          case 64: break;
+          case 11: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     yybegin(LEFT_ANGLE_BRACKET_SPACE);
-          }
-        case 65: break;
-        case 12: 
-          { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
-          }
-        case 66: break;
-        case 13: 
-          { inputSegment.append(yytext());
-          }
-        case 67: break;
-        case 14: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 65: break;
+          case 12: 
+            { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+            } 
+            // fall through
+          case 66: break;
+          case 13: 
+            { inputSegment.append(yytext());
+            } 
+            // fall through
+          case 67: break;
+          case 14: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += inputSegment.length() + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
-          }
-        case 68: break;
-        case 15: 
-          { 
-          }
-        case 69: break;
-        case 16: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 70: break;
-        case 17: 
-          { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 71: break;
-        case 18: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 68: break;
+          case 15: 
+            { 
+            } 
+            // fall through
+          case 69: break;
+          case 16: 
+            { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 70: break;
+          case 17: 
+            { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 71: break;
+          case 18: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(END_TAG_TAIL_INCLUDE);
     } else {
       yybegin(END_TAG_TAIL_SUBSTITUTE);
     }
-          }
-        case 72: break;
-        case 19: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 72: break;
+          case 19: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
       yybegin(END_TAG_TAIL_INCLUDE);
     } else {
       yybegin(END_TAG_TAIL_EXCLUDE);
     }
-          }
-        case 73: break;
-        case 20: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-          }
-        case 74: break;
-        case 21: 
-          { if (yylength() == 1) {
+            } 
+            // fall through
+          case 73: break;
+          case 20: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 74: break;
+          case 21: 
+            { if (yylength() == 1) {
       return zzBuffer[zzStartRead];
     } else {
       outputSegment.append(yytext()); return outputSegment.nextChar();
     }
-          }
-        case 75: break;
-        case 22: 
-          { previousRestoreState = restoreState;
+            } 
+            // fall through
+          case 75: break;
+          case 22: 
+            { previousRestoreState = restoreState;
     restoreState = SERVER_SIDE_INCLUDE;
     yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 76: break;
-        case 23: 
-          { previousRestoreState = restoreState;
+            } 
+            // fall through
+          case 76: break;
+          case 23: 
+            { previousRestoreState = restoreState;
     restoreState = SERVER_SIDE_INCLUDE;
     yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 77: break;
-        case 24: 
-          { yybegin(restoreState); restoreState = previousRestoreState;
-          }
-        case 78: break;
-        case 25: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+            } 
+            // fall through
+          case 77: break;
+          case 24: 
+            { yybegin(restoreState); restoreState = previousRestoreState;
+            } 
+            // fall through
+          case 78: break;
+          case 25: 
+            { inputSegment.write(zzBuffer, zzStartRead, yylength());
      outputSegment = inputSegment;
      yybegin(YYINITIAL);
      return outputSegment.nextChar();
-          }
-        case 79: break;
-        case 26: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 79: break;
+          case 26: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - 1;
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
     return BLOCK_LEVEL_END_TAG_REPLACEMENT;
-          }
-        case 80: break;
-        case 27: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 80: break;
+          case 27: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += inputSegment.length() + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     outputSegment = inputSegment;
     yybegin(YYINITIAL);
-          }
-        case 81: break;
-        case 28: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
+            } 
+            // fall through
+          case 81: break;
+          case 28: 
+            { // add (previously matched input length) + (this match length) - (substitution length)
     cumulativeDiff += inputSegment.length() + yylength() - 1;
     // position the correction at (already output length) + (substitution length)
     addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
     return BLOCK_LEVEL_START_TAG_REPLACEMENT;
-          }
-        case 82: break;
-        case 29: 
-          { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 83: break;
-        case 30: 
-          { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 84: break;
-        case 31: 
-          { int length = yylength();
+            } 
+            // fall through
+          case 82: break;
+          case 29: 
+            { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 83: break;
+          case 30: 
+            { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+            } 
+            // fall through
+          case 84: break;
+          case 31: 
+            { int length = yylength();
     inputSegment.write(zzBuffer, zzStartRead, length);
     entitySegment.clear();
     char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
     entitySegment.append(ch);
     outputSegment = entitySegment;
     yybegin(CHARACTER_REFERENCE_TAIL);
-          }
-        case 85: break;
-        case 32: 
-          { int matchLength = yylength();
+            } 
+            // fall through
+          case 85: break;
+          case 32: 
+            { int matchLength = yylength();
     inputSegment.write(zzBuffer, zzStartRead, matchLength);
     if (matchLength <= 6) { // 10FFFF: max 6 hex chars
       String hexCharRef
@@ -30692,18 +30754,20 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       yybegin(YYINITIAL);
       return outputSegment.nextChar();
     }
-          }
-        case 86: break;
-        case 33: 
-          { if (inputSegment.length() > 2) { // Chars between "<!" and "--" - this is not a comment
+            } 
+            // fall through
+          case 86: break;
+          case 33: 
+            { if (inputSegment.length() > 2) { // Chars between "<!" and "--" - this is not a comment
       inputSegment.append(yytext());
     } else {
       yybegin(COMMENT);
     }
-          }
-        case 87: break;
-        case 34: 
-          { yybegin(YYINITIAL);
+            } 
+            // fall through
+          case 87: break;
+          case 34: 
+            { yybegin(YYINITIAL);
     if (escapeBR) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
@@ -30716,23 +30780,26 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.reset();
       return BR_START_TAG_REPLACEMENT;
     }
-          }
-        case 88: break;
-        case 35: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
+            } 
+            // fall through
+          case 88: break;
+          case 35: 
+            { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
     cumulativeDiff += yychar - inputStart + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
     yybegin(YYINITIAL);
-          }
-        case 89: break;
-        case 36: 
-          { yybegin(SCRIPT);
-          }
-        case 90: break;
-        case 37: 
-          { yybegin(YYINITIAL);
+            } 
+            // fall through
+          case 89: break;
+          case 36: 
+            { yybegin(SCRIPT);
+            } 
+            // fall through
+          case 90: break;
+          case 37: 
+            { yybegin(YYINITIAL);
     if (escapeBR) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
@@ -30745,66 +30812,77 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.reset();
       return BR_END_TAG_REPLACEMENT;
     }
-          }
-        case 91: break;
-        case 38: 
-          { // add (this match length) [ - (substitution length) = 0 ]
+            } 
+            // fall through
+          case 91: break;
+          case 38: 
+            { // add (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     yybegin(YYINITIAL);
-          }
-        case 92: break;
-        case 39: 
-          { yybegin(restoreState);
-          }
-        case 93: break;
-        case 40: 
-          { yybegin(STYLE);
-          }
-        case 94: break;
-        case 41: 
-          { yybegin(SCRIPT_COMMENT);
-          }
-        case 95: break;
-        case 42: 
-          { yybegin(STYLE_COMMENT);
-          }
-        case 96: break;
-        case 43: 
-          { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 97: break;
-        case 44: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 98: break;
-        case 45: 
-          { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 99: break;
-        case 46: 
-          { yybegin(STYLE);
+            } 
+            // fall through
+          case 92: break;
+          case 39: 
+            { yybegin(restoreState);
+            } 
+            // fall through
+          case 93: break;
+          case 40: 
+            { yybegin(STYLE);
+            } 
+            // fall through
+          case 94: break;
+          case 41: 
+            { yybegin(SCRIPT_COMMENT);
+            } 
+            // fall through
+          case 95: break;
+          case 42: 
+            { yybegin(STYLE_COMMENT);
+            } 
+            // fall through
+          case 96: break;
+          case 43: 
+            { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 97: break;
+          case 44: 
+            { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 98: break;
+          case 45: 
+            { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+            } 
+            // fall through
+          case 99: break;
+          case 46: 
+            { yybegin(STYLE);
     if (escapeSTYLE) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
       inputStart += 1 + yylength();
       return outputSegment.nextChar();
     }
-          }
-        case 100: break;
-        case 47: 
-          { yybegin(SCRIPT);
+            } 
+            // fall through
+          case 100: break;
+          case 47: 
+            { yybegin(SCRIPT);
     if (escapeSCRIPT) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
       inputStart += 1 + yylength();
       return outputSegment.nextChar();
     }
-          }
-        case 101: break;
-        case 48: 
-          { if (inputSegment.length() > 2) { // Chars between "<!" and "[CDATA[" - this is not a CDATA section
+            } 
+            // fall through
+          case 101: break;
+          case 48: 
+            { if (inputSegment.length() > 2) { // Chars between "<!" and "[CDATA[" - this is not a CDATA section
       inputSegment.append(yytext());
     } else {
       // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
@@ -30814,10 +30892,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
       inputSegment.clear();
       yybegin(CDATA);
     }
-          }
-        case 102: break;
-        case 49: 
-          { inputSegment.clear();
+            } 
+            // fall through
+          case 102: break;
+          case 49: 
+            { inputSegment.clear();
     yybegin(YYINITIAL);
     // add (previously matched input length) -- current match and substitution handled below
     cumulativeDiff += yychar - inputStart;
@@ -30837,10 +30916,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
     addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
     return returnValue;
-          }
-        case 103: break;
-        case 50: 
-          { inputSegment.clear();
+            } 
+            // fall through
+          case 103: break;
+          case 50: 
+            { inputSegment.clear();
     yybegin(YYINITIAL);
     // add (previously matched input length) -- current match and substitution handled below
     cumulativeDiff += yychar - inputStart;
@@ -30860,10 +30940,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     }
     addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
     return returnValue;
-          }
-        case 104: break;
-        case 51: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 104: break;
+          case 51: 
+            { // Handle paired UTF-16 surrogates.
     outputSegment = entitySegment;
     outputSegment.clear();
     String surrogatePair = yytext();
@@ -30888,10 +30969,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     inputSegment.clear();
     yybegin(YYINITIAL);
     return highSurrogate;
-          }
-        case 105: break;
-        case 52: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 105: break;
+          case 52: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     char lowSurrogate = '\u0000';
@@ -30922,10 +31004,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 106: break;
-        case 53: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 106: break;
+          case 53: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     try { // High surrogates are in decimal range [55296, 56319]
@@ -30955,10 +31038,11 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 107: break;
-        case 54: 
-          { // Handle paired UTF-16 surrogates.
+            } 
+            // fall through
+          case 107: break;
+          case 54: 
+            { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
     try { // High surrogates are in decimal range [55296, 56319]
@@ -30991,18 +31075,12 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
-          }
-        case 108: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            zzDoEOF();
-              {   return eofReturnValue;
- }
-          } 
-          else {
+            } 
+            // fall through
+          case 108: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
index d810d79..8b83de0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
@@ -33,7 +33,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %apiprivate
 %type int
 %final
@@ -50,6 +50,10 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 %xstate START_TAG_TAIL_INCLUDE, START_TAG_TAIL_EXCLUDE, START_TAG_TAIL_SUBSTITUTE
 %xstate STYLE, STYLE_COMMENT
 
+%init{
+  super(in);
+%init}
+
 // From XML 1.0 <http://www.w3.org/TR/xml/>:
 //
 //    [4]  NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [...]
@@ -166,24 +170,14 @@ InlineElment = ( [aAbBiIqQsSuU]                   |
   private TextSegment entitySegment = new TextSegment(2);
 
   /**
-   * Creates a new HTMLStripCharFilter over the provided Reader.
-   * @param source Reader to strip html tags from.
-   */
-  public HTMLStripCharFilter(Reader source) {
-    super(source);
-    this.zzReader = source;
-  }
-
-  /**
    * Creates a new HTMLStripCharFilter over the provided Reader
    * with the specified start and end tags.
-   * @param source Reader to strip html tags from.
+   * @param in Reader to strip html tags from.
    * @param escapedTags Tags in this set (both start and end tags)
    *  will not be filtered out.
    */
-  public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
-    super(source);
-    this.zzReader = source;
+  public HTMLStripCharFilter(Reader in, Set<String> escapedTags) {
+    this(in);
     if (null != escapedTags) {
       for (String tag : escapedTags) {
         if (tag.equalsIgnoreCase("BR")) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
index 7e5105d..31d3d96 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -251,7 +251,7 @@ class ClassicTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -323,11 +323,11 @@ class ClassicTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -436,28 +436,29 @@ public final void getText(CharTermAttribute t) {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -681,55 +682,65 @@ public final void getText(CharTermAttribute t) {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
-          }
-        case 11: break;
-        case 2: 
-          { return ALPHANUM;
-          }
-        case 12: break;
-        case 3: 
-          { return CJ;
-          }
-        case 13: break;
-        case 4: 
-          { return HOST;
-          }
-        case 14: break;
-        case 5: 
-          { return NUM;
-          }
-        case 15: break;
-        case 6: 
-          { return APOSTROPHE;
-          }
-        case 16: break;
-        case 7: 
-          { return COMPANY;
-          }
-        case 17: break;
-        case 8: 
-          { return ACRONYM_DEP;
-          }
-        case 18: break;
-        case 9: 
-          { return ACRONYM;
-          }
-        case 19: break;
-        case 10: 
-          { return EMAIL;
-          }
-        case 20: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            return YYEOF;
-          } 
-          else {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+        return YYEOF;
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+            } 
+            // fall through
+          case 11: break;
+          case 2: 
+            { return ALPHANUM;
+            } 
+            // fall through
+          case 12: break;
+          case 3: 
+            { return CJ;
+            } 
+            // fall through
+          case 13: break;
+          case 4: 
+            { return HOST;
+            } 
+            // fall through
+          case 14: break;
+          case 5: 
+            { return NUM;
+            } 
+            // fall through
+          case 15: break;
+          case 6: 
+            { return APOSTROPHE;
+            } 
+            // fall through
+          case 16: break;
+          case 7: 
+            { return COMPANY;
+            } 
+            // fall through
+          case 17: break;
+          case 8: 
+            { return ACRONYM_DEP;
+            } 
+            // fall through
+          case 18: break;
+          case 9: 
+            { return ACRONYM;
+            } 
+            // fall through
+          case 19: break;
+          case 10: 
+            { return EMAIL;
+            } 
+            // fall through
+          case 20: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/283b19a8/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
index 842ae51..65848f2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
@@ -32,33 +32,32 @@ import org.apache.lucene.util.AttributeFactory;
  * algorithm, as specified in 
  * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
  * URLs and email addresses are also tokenized according to the relevant RFCs.
- * <p>
- * Tokens produced are of the following types:
- * <ul>
- *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
- *   <li>&lt;NUM&gt;: A number</li>
- *   <li>&lt;URL&gt;: A URL</li>
- *   <li>&lt;EMAIL&gt;: An email address</li>
- *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
- *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
- *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
- * </ul>
  */
 
 public final class UAX29URLEmailTokenizer extends Tokenizer {
   /** A private instance of the JFlex-constructed scanner */
   private final UAX29URLEmailTokenizerImpl scanner;
-  
-  public static final int ALPHANUM          = 0;
-  public static final int NUM               = 1;
-  public static final int SOUTHEAST_ASIAN   = 2;
-  public static final int IDEOGRAPHIC       = 3;
-  public static final int HIRAGANA          = 4;
-  public static final int KATAKANA          = 5;
-  public static final int HANGUL            = 6;
-  public static final int URL               = 7;
-  public static final int EMAIL             = 8;
+
+  /** Alpha/numeric token type */
+  public static final int ALPHANUM = 0;
+  /** Numeric token type */
+  public static final int NUM = 1;
+  /** Southeast Asian token type */
+  public static final int SOUTHEAST_ASIAN = 2;
+  /** Ideographic token type */
+  public static final int IDEOGRAPHIC = 3;
+  /** Hiragana token type */
+  public static final int HIRAGANA = 4;
+  /** Katakana token type */
+  public static final int KATAKANA = 5;
+  /** Hangul token type */
+  public static final int HANGUL = 6;
+  /** URL token type */
+  public static final int URL = 7;
+  /** Email token type */
+  public static final int EMAIL = 8;
+  /** Emoji token type. */
+  public static final int EMOJI = 9;
 
   /** String token types that correspond to token type int constants */
   public static final String [] TOKEN_TYPES = new String [] {
@@ -71,6 +70,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
     StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL],
     "<URL>",
     "<EMAIL>",
+    StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMOJI]
   };
 
   /** Absolute maximum sized token */


[15/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
index a797082..3d964d9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -39,6 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
@@ -69,156 +70,221 @@ public final class UAX29URLEmailTokenizerImpl {
    * Translates characters to character classes
    */
   private static final String ZZ_CMAP_PACKED = 
-    "\1\112\10\110\2\112\2\110\1\112\23\110\1\113\1\17\1\103\1\113"+
-    "\1\75\1\73\1\16\2\76\2\113\1\77\1\57\1\24\1\102\1\65"+
-    "\1\60\1\63\1\62\1\61\1\71\1\70\1\66\1\67\1\64\1\72"+
-    "\1\106\1\110\1\107\1\110\1\101\1\100\1\25\1\30\1\37\1\42"+
-    "\1\36\1\50\1\41\1\32\1\35\1\56\1\51\1\40\1\44\1\46"+
-    "\1\33\1\27\1\54\1\26\1\47\1\31\1\43\1\34\1\55\1\53"+
-    "\1\45\1\52\1\104\1\111\1\105\1\114\1\74\1\114\1\25\1\30"+
-    "\1\37\1\42\1\36\1\50\1\41\1\32\1\35\1\56\1\51\1\40"+
-    "\1\44\1\46\1\33\1\27\1\54\1\26\1\47\1\31\1\43\1\34"+
-    "\1\55\1\53\1\45\1\52\3\114\1\73\1\115\52\0\1\14\2\0"+
-    "\1\3\7\0\1\14\1\0\1\7\2\0\1\14\5\0\27\14\1\0"+
-    "\37\14\1\0\u01ca\14\4\0\14\14\5\0\1\7\10\0\5\14\7\0"+
-    "\1\14\1\0\1\14\21\0\160\117\5\14\1\0\2\14\2\0\4\14"+
-    "\1\10\7\0\1\14\1\7\3\14\1\0\1\14\1\0\24\14\1\0"+
-    "\123\14\1\0\213\14\1\0\7\117\236\14\11\0\46\14\2\0\1\14"+
-    "\7\0\47\14\1\0\1\10\7\0\55\117\1\0\1\117\1\0\2\117"+
-    "\1\0\2\117\1\0\1\117\10\0\33\20\5\0\3\20\1\1\1\7"+
-    "\13\0\5\3\7\0\2\10\2\0\13\117\1\0\1\3\3\0\53\14"+
-    "\25\117\12\4\1\0\1\5\1\10\1\0\2\14\1\117\143\14\1\0"+
-    "\1\14\7\117\1\3\1\0\6\117\2\14\2\117\1\0\4\117\2\14"+
-    "\12\4\3\14\2\0\1\14\17\0\1\3\1\14\1\117\36\14\33\117"+
-    "\2\0\131\14\13\117\1\14\16\0\12\4\41\14\11\117\2\14\2\0"+
-    "\1\10\1\0\1\14\5\0\26\14\4\117\1\14\11\117\1\14\3\117"+
-    "\1\14\5\117\22\0\31\14\3\117\104\0\1\14\1\0\13\14\67\0"+
-    "\33\117\1\0\4\117\66\14\3\117\1\14\22\117\1\14\7\117\12\14"+
-    "\2\117\2\0\12\4\1\0\7\14\1\0\7\14\1\0\3\117\1\0"+
-    "\10\14\2\0\2\14\2\0\26\14\1\0\7\14\1\0\1\14\3\0"+
-    "\4\14\2\0\1\117\1\14\7\117\2\0\2\117\2\0\3\117\1\14"+
-    "\10\0\1\117\4\0\2\14\1\0\3\14\2\117\2\0\12\4\2\14"+
-    "\17\0\3\117\1\0\6\14\4\0\2\14\2\0\26\14\1\0\7\14"+
-    "\1\0\2\14\1\0\2\14\1\0\2\14\2\0\1\117\1\0\5\117"+
-    "\4\0\2\117\2\0\3\117\3\0\1\117\7\0\4\14\1\0\1\14"+
-    "\7\0\12\4\2\117\3\14\1\117\13\0\3\117\1\0\11\14\1\0"+
-    "\3\14\1\0\26\14\1\0\7\14\1\0\2\14\1\0\5\14\2\0"+
-    "\1\117\1\14\10\117\1\0\3\117\1\0\3\117\2\0\1\14\17\0"+
-    "\2\14\2\117\2\0\12\4\21\0\3\117\1\0\10\14\2\0\2\14"+
-    "\2\0\26\14\1\0\7\14\1\0\2\14\1\0\5\14\2\0\1\117"+
-    "\1\14\7\117\2\0\2\117\2\0\3\117\10\0\2\117\4\0\2\14"+
-    "\1\0\3\14\2\117\2\0\12\4\1\0\1\14\20\0\1\117\1\14"+
-    "\1\0\6\14\3\0\3\14\1\0\4\14\3\0\2\14\1\0\1\14"+
-    "\1\0\2\14\3\0\2\14\3\0\3\14\3\0\14\14\4\0\5\117"+
-    "\3\0\3\117\1\0\4\117\2\0\1\14\6\0\1\117\16\0\12\4"+
-    "\21\0\3\117\1\0\10\14\1\0\3\14\1\0\27\14\1\0\12\14"+
-    "\1\0\5\14\3\0\1\14\7\117\1\0\3\117\1\0\4\117\7\0"+
-    "\2\117\1\0\2\14\6\0\2\14\2\117\2\0\12\4\22\0\2\117"+
-    "\1\0\10\14\1\0\3\14\1\0\27\14\1\0\12\14\1\0\5\14"+
-    "\2\0\1\117\1\14\7\117\1\0\3\117\1\0\4\117\7\0\2\117"+
-    "\7\0\1\14\1\0\2\14\2\117\2\0\12\4\1\0\2\14\17\0"+
-    "\2\117\1\0\10\14\1\0\3\14\1\0\51\14\2\0\1\14\7\117"+
-    "\1\0\3\117\1\0\4\117\1\14\10\0\1\117\10\0\2\14\2\117"+
-    "\2\0\12\4\12\0\6\14\2\0\2\117\1\0\22\14\3\0\30\14"+
-    "\1\0\11\14\1\0\1\14\2\0\7\14\3\0\1\117\4\0\6\117"+
-    "\1\0\1\117\1\0\10\117\22\0\2\117\15\0\60\123\1\23\2\123"+
-    "\7\23\5\0\7\123\10\23\1\0\12\4\47\0\2\123\1\0\1\123"+
-    "\2\0\2\123\1\0\1\123\2\0\1\123\6\0\4\123\1\0\7\123"+
-    "\1\0\3\123\1\0\1\123\1\0\1\123\2\0\2\123\1\0\4\123"+
-    "\1\23\2\123\6\23\1\0\2\23\1\123\2\0\5\123\1\0\1\123"+
-    "\1\0\6\23\2\0\12\4\2\0\4\123\40\0\1\14\27\0\2\117"+
-    "\6\0\12\4\13\0\1\117\1\0\1\117\1\0\1\117\4\0\2\117"+
-    "\10\14\1\0\44\14\4\0\24\117\1\0\2\117\5\14\13\117\1\0"+
-    "\44\117\11\0\1\117\71\0\53\123\24\23\1\123\12\4\6\0\6\123"+
-    "\4\23\4\123\3\23\1\123\3\23\2\123\7\23\3\123\4\23\15\123"+
-    "\14\23\1\123\1\23\12\4\4\23\2\22\46\14\1\0\1\14\5\0"+
-    "\1\14\2\0\53\14\1\0\4\14\u0100\2\111\14\1\0\4\14\2\0"+
-    "\7\14\1\0\1\14\1\0\4\14\2\0\51\14\1\0\4\14\2\0"+
-    "\41\14\1\0\4\14\2\0\7\14\1\0\1\14\1\0\4\14\2\0"+
-    "\17\14\1\0\71\14\1\0\4\14\2\0\103\14\2\0\3\117\40\0"+
-    "\20\14\20\0\125\14\14\0\u026c\14\2\0\21\14\1\0\32\14\5\0"+
-    "\113\14\3\0\3\14\17\0\15\14\1\0\4\14\3\117\13\0\22\14"+
-    "\3\117\13\0\22\14\2\117\14\0\15\14\1\0\3\14\1\0\2\117"+
-    "\14\0\64\123\40\23\3\0\1\123\4\0\1\123\1\23\2\0\12\4"+
-    "\41\0\3\117\1\3\1\0\12\4\6\0\130\14\10\0\51\14\1\117"+
-    "\1\14\5\0\106\14\12\0\35\14\3\0\14\117\4\0\14\117\12\0"+
-    "\12\4\36\123\2\0\5\123\13\0\54\123\4\0\21\23\7\123\2\23"+
-    "\6\0\12\4\1\22\3\0\2\22\40\0\27\14\5\117\4\0\65\123"+
-    "\12\23\1\0\35\23\2\0\1\117\12\4\6\0\12\4\6\0\7\22"+
-    "\1\123\6\22\122\0\5\117\57\14\21\117\7\14\4\0\12\4\21\0"+
-    "\11\117\14\0\3\117\36\14\15\117\2\14\12\4\54\14\16\117\14\0"+
-    "\44\14\24\117\10\0\12\4\3\0\3\14\12\4\44\14\122\0\3\117"+
-    "\1\0\25\117\4\14\1\117\4\14\3\117\2\14\11\0\300\14\47\117"+
-    "\25\0\4\117\u0116\14\2\0\6\14\2\0\46\14\2\0\6\14\2\0"+
-    "\10\14\1\0\1\14\1\0\1\14\1\0\1\14\1\0\37\14\2\0"+
-    "\65\14\1\0\7\14\1\0\1\14\3\0\3\14\1\0\7\14\3\0"+
-    "\4\14\2\0\6\14\4\0\15\14\5\0\3\14\1\0\7\14\17\0"+
-    "\4\3\10\0\2\11\12\0\1\11\2\0\1\7\2\0\5\3\20\0"+
-    "\2\12\3\0\1\10\17\0\1\12\13\0\5\3\1\0\12\3\1\0"+
-    "\1\14\15\0\1\14\20\0\15\14\63\0\41\117\21\0\1\14\4\0"+
-    "\1\14\2\0\12\14\1\0\1\14\3\0\5\14\6\0\1\14\1\0"+
-    "\1\14\1\0\1\14\1\0\4\14\1\0\13\14\2\0\4\14\5\0"+
-    "\5\14\4\0\1\14\21\0\51\14\u032d\0\64\14\u0716\0\57\14\1\0"+
-    "\57\14\1\0\205\14\6\0\4\14\3\117\2\14\14\0\46\14\1\0"+
-    "\1\14\5\0\1\14\2\0\70\14\7\0\1\14\17\0\1\117\27\14"+
-    "\11\0\7\14\1\0\7\14\1\0\7\14\1\0\7\14\1\0\7\14"+
-    "\1\0\7\14\1\0\7\14\1\0\7\14\1\0\40\117\57\0\1\14"+
-    "\120\0\32\13\1\0\131\13\14\0\326\13\57\0\1\14\1\116\1\121"+
-    "\31\0\11\121\6\117\1\0\5\120\2\0\3\121\1\14\1\14\4\0"+
-    "\126\122\2\0\2\117\2\6\3\122\1\6\132\120\1\0\4\120\5\0"+
-    "\51\14\3\0\136\2\21\0\33\14\65\0\20\120\320\0\57\6\1\0"+
-    "\130\6\250\0\u19b6\121\112\0\u51cd\121\63\0\u048d\14\103\0\56\14\2\0"+
-    "\u010d\14\3\0\20\14\12\4\2\14\24\0\57\14\4\117\1\0\12\117"+
-    "\1\0\31\14\7\0\1\117\120\14\2\117\45\0\11\14\2\0\147\14"+
-    "\2\0\4\14\1\0\4\14\14\0\13\14\115\0\12\14\1\117\3\14"+
-    "\1\117\4\14\1\117\27\14\5\117\30\0\64\14\14\0\2\117\62\14"+
-    "\21\117\13\0\12\4\6\0\22\117\6\14\3\0\1\14\4\0\12\4"+
-    "\34\14\10\117\2\0\27\14\15\117\14\0\35\2\3\0\4\117\57\14"+
-    "\16\117\16\0\1\14\12\4\46\0\51\14\16\117\11\0\3\14\1\117"+
-    "\10\14\2\117\2\0\12\4\6\0\27\123\3\22\1\123\1\23\4\0"+
-    "\60\123\1\23\1\123\3\23\2\123\2\23\5\123\2\23\1\123\1\23"+
-    "\1\123\30\0\3\123\2\22\13\14\5\117\2\0\3\14\2\117\12\0"+
-    "\6\14\2\0\6\14\2\0\6\14\11\0\7\14\1\0\7\14\221\0"+
-    "\43\14\10\117\1\0\2\117\2\0\12\4\6\0\u2ba4\2\14\0\27\2"+
-    "\4\0\61\2\u2104\0\u016e\121\2\0\152\121\46\0\7\14\14\0\5\14"+
-    "\5\0\1\20\1\117\12\20\1\0\15\20\1\0\5\20\1\0\1\20"+
-    "\1\0\2\20\1\0\2\20\1\0\12\20\142\14\41\0\u016b\14\22\0"+
-    "\100\14\2\0\66\14\50\0\14\14\4\0\20\117\1\10\2\0\1\7"+
-    "\1\10\13\0\7\117\14\0\2\12\30\0\3\12\1\10\1\0\1\11"+
-    "\1\0\1\10\1\7\32\0\5\14\1\0\207\14\2\0\1\3\7\0"+
-    "\1\11\4\0\1\10\1\0\1\11\1\0\12\4\1\7\1\10\5\0"+
-    "\32\14\4\0\1\12\1\0\32\14\13\0\70\120\2\117\37\2\3\0"+
-    "\6\2\2\0\6\2\2\0\6\2\2\0\3\2\34\0\3\3\4\0"+
-    "\14\14\1\0\32\14\1\0\23\14\1\0\2\14\1\0\17\14\2\0"+
-    "\16\14\42\0\173\14\105\0\65\14\210\0\1\117\202\0\35\14\3\0"+
-    "\61\14\57\0\37\14\21\0\33\14\65\0\36\14\2\0\44\14\4\0"+
-    "\10\14\1\0\5\14\52\0\236\14\2\0\12\4\u0356\0\6\14\2\0"+
-    "\1\14\1\0\54\14\1\0\2\14\3\0\1\14\2\0\27\14\252\0"+
-    "\26\14\12\0\32\14\106\0\70\14\6\0\2\14\100\0\1\14\3\117"+
-    "\1\0\2\117\5\0\4\117\4\14\1\0\3\14\1\0\33\14\4\0"+
-    "\3\117\4\0\1\117\40\0\35\14\203\0\66\14\12\0\26\14\12\0"+
-    "\23\14\215\0\111\14\u03b7\0\3\117\65\14\17\117\37\0\12\4\20\0"+
-    "\3\117\55\14\13\117\2\0\1\3\22\0\31\14\7\0\12\4\6\0"+
-    "\3\117\44\14\16\117\1\0\12\4\100\0\3\117\60\14\16\117\4\14"+
-    "\13\0\12\4\u04a6\0\53\14\15\117\10\0\12\4\u0936\0\u036f\14\221\0"+
-    "\143\14\u0b9d\0\u042f\14\u33d1\0\u0239\14\u04c7\0\105\14\13\0\1\14\56\117"+
-    "\20\0\4\117\15\14\u4060\0\1\120\1\122\u2163\0\5\117\3\0\6\117"+
-    "\10\3\10\117\2\0\7\117\36\0\4\117\224\0\3\117\u01bb\0\125\14"+
-    "\1\0\107\14\1\0\2\14\2\0\1\14\2\0\2\14\2\0\4\14"+
-    "\1\0\14\14\1\0\1\14\1\0\7\14\1\0\101\14\1\0\4\14"+
-    "\2\0\10\14\1\0\7\14\1\0\34\14\1\0\4\14\1\0\5\14"+
-    "\1\0\1\14\3\0\7\14\1\0\u0154\14\2\0\31\14\1\0\31\14"+
-    "\1\0\37\14\1\0\31\14\1\0\37\14\1\0\31\14\1\0\37\14"+
-    "\1\0\31\14\1\0\37\14\1\0\31\14\1\0\10\14\2\0\62\4"+
-    "\u1600\0\4\14\1\0\33\14\1\0\2\14\1\0\1\14\2\0\1\14"+
-    "\1\0\12\14\1\0\4\14\1\0\1\14\1\0\1\14\6\0\1\14"+
-    "\4\0\1\14\1\0\1\14\1\0\1\14\1\0\3\14\1\0\2\14"+
-    "\1\0\1\14\2\0\1\14\1\0\1\14\1\0\1\14\1\0\1\14"+
-    "\1\0\1\14\1\0\2\14\1\0\1\14\2\0\4\14\1\0\7\14"+
-    "\1\0\4\14\1\0\4\14\1\0\1\14\1\0\12\14\1\0\21\14"+
-    "\5\0\3\14\1\0\5\14\1\0\21\14\u032a\0\32\21\1\15\u0dff\0"+
-    "\ua6d7\121\51\0\u1035\121\13\0\336\121\u3fe2\0\u021e\121\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
-    "\1\3\36\0\140\3\200\0\360\117\uffff\0\uffff\0\ufe12\0";
+    "\1\125\10\123\2\125\2\123\1\125\23\123\1\126\1\34\1\7\1\126"+
+    "\1\110\1\106\1\33\2\111\1\112\1\126\1\113\1\73\1\40\1\116"+
+    "\1\101\1\74\1\77\1\76\1\75\1\22\1\104\1\102\1\103\1\100"+
+    "\1\105\1\121\1\123\1\122\1\123\1\115\1\114\1\41\1\44\1\53"+
+    "\1\56\1\52\1\64\1\55\1\46\1\51\1\72\1\65\1\54\1\60"+
+    "\1\62\1\47\1\43\1\70\1\42\1\63\1\45\1\57\1\50\1\71"+
+    "\1\67\1\61\1\66\1\117\1\124\1\120\1\127\1\107\1\127\1\41"+
+    "\1\44\1\53\1\56\1\52\1\64\1\55\1\46\1\51\1\72\1\65"+
+    "\1\54\1\60\1\62\1\47\1\43\1\70\1\42\1\63\1\45\1\57"+
+    "\1\50\1\71\1\67\1\61\1\66\3\127\1\106\1\130\51\0\1\5"+
+    "\1\31\2\0\1\1\1\5\6\0\1\31\1\0\1\24\2\0\1\31"+
+    "\5\0\27\31\1\0\37\31\1\0\u01ca\31\4\0\14\31\5\0\1\24"+
+    "\10\0\5\31\7\0\1\31\1\0\1\31\21\0\160\132\5\31\1\0"+
+    "\2\31\2\0\4\31\1\25\1\31\6\0\1\31\1\24\3\31\1\0"+
+    "\1\31\1\0\24\31\1\0\123\31\1\0\213\31\1\0\7\132\246\31"+
+    "\1\0\46\31\2\0\1\31\7\0\47\31\1\0\1\25\7\0\55\132"+
+    "\1\0\1\132\1\0\2\132\1\0\2\132\1\0\1\132\10\0\33\35"+
+    "\5\0\3\35\1\15\1\24\13\0\6\1\6\0\2\25\2\0\13\132"+
+    "\1\0\1\1\3\0\53\31\25\132\12\20\1\0\1\21\1\25\1\0"+
+    "\2\31\1\132\143\31\1\0\1\31\7\132\1\1\1\0\6\132\2\31"+
+    "\2\132\1\0\4\132\2\31\12\20\3\31\2\0\1\31\17\0\1\1"+
+    "\1\31\1\132\36\31\33\132\2\0\131\31\13\132\1\31\16\0\12\20"+
+    "\41\31\11\132\2\31\2\0\1\25\1\0\1\31\5\0\26\31\4\132"+
+    "\1\31\11\132\1\31\3\132\1\31\5\132\22\0\31\31\3\132\104\0"+
+    "\25\31\1\0\10\31\26\0\16\132\1\1\41\132\66\31\3\132\1\31"+
+    "\22\132\1\31\7\132\12\31\2\132\2\0\12\20\1\0\20\31\3\132"+
+    "\1\0\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\1\31"+
+    "\3\0\4\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132"+
+    "\1\31\10\0\1\132\4\0\2\31\1\0\3\31\2\132\2\0\12\20"+
+    "\2\31\17\0\3\132\1\0\6\31\4\0\2\31\2\0\26\31\1\0"+
+    "\7\31\1\0\2\31\1\0\2\31\1\0\2\31\2\0\1\132\1\0"+
+    "\5\132\4\0\2\132\2\0\3\132\3\0\1\132\7\0\4\31\1\0"+
+    "\1\31\7\0\12\20\2\132\3\31\1\132\13\0\3\132\1\0\11\31"+
+    "\1\0\3\31\1\0\26\31\1\0\7\31\1\0\2\31\1\0\5\31"+
+    "\2\0\1\132\1\31\10\132\1\0\3\132\1\0\3\132\2\0\1\31"+
+    "\17\0\2\31\2\132\2\0\12\20\11\0\1\31\7\0\3\132\1\0"+
+    "\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\2\31\1\0"+
+    "\5\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132\10\0"+
+    "\2\132\4\0\2\31\1\0\3\31\2\132\2\0\12\20\1\0\1\31"+
+    "\20\0\1\132\1\31\1\0\6\31\3\0\3\31\1\0\4\31\3\0"+
+    "\2\31\1\0\1\31\1\0\2\31\3\0\2\31\3\0\3\31\3\0"+
+    "\14\31\4\0\5\132\3\0\3\132\1\0\4\132\2\0\1\31\6\0"+
+    "\1\132\16\0\12\20\20\0\4\132\1\0\10\31\1\0\3\31\1\0"+
+    "\27\31\1\0\20\31\3\0\1\31\7\132\1\0\3\132\1\0\4\132"+
+    "\7\0\2\132\1\0\3\31\5\0\2\31\2\132\2\0\12\20\20\0"+
+    "\1\31\3\132\1\0\10\31\1\0\3\31\1\0\27\31\1\0\12\31"+
+    "\1\0\5\31\2\0\1\132\1\31\7\132\1\0\3\132\1\0\4\132"+
+    "\7\0\2\132\7\0\1\31\1\0\2\31\2\132\2\0\12\20\1\0"+
+    "\2\31\16\0\3\132\1\0\10\31\1\0\3\31\1\0\51\31\2\0"+
+    "\1\31\7\132\1\0\3\132\1\0\4\132\1\31\5\0\3\31\1\132"+
+    "\7\0\3\31\2\132\2\0\12\20\12\0\6\31\2\0\2\132\1\0"+
+    "\22\31\3\0\30\31\1\0\11\31\1\0\1\31\2\0\7\31\3\0"+
+    "\1\132\4\0\6\132\1\0\1\132\1\0\10\132\6\0\12\20\2\0"+
+    "\2\132\15\0\60\137\1\37\2\137\7\37\5\0\7\137\10\37\1\0"+
+    "\12\20\47\0\2\137\1\0\1\137\2\0\2\137\1\0\1\137\2\0"+
+    "\1\137\6\0\4\137\1\0\7\137\1\0\3\137\1\0\1\137\1\0"+
+    "\1\137\2\0\2\137\1\0\4\137\1\37\2\137\6\37\1\0\2\37"+
+    "\1\137\2\0\5\137\1\0\1\137\1\0\6\37\2\0\12\20\2\0"+
+    "\4\137\40\0\1\31\27\0\2\132\6\0\12\20\13\0\1\132\1\0"+
+    "\1\132\1\0\1\132\4\0\2\132\10\31\1\0\44\31\4\0\24\132"+
+    "\1\0\2\132\5\31\13\132\1\0\44\132\11\0\1\132\71\0\53\137"+
+    "\24\37\1\137\12\20\6\0\6\137\4\37\4\137\3\37\1\137\3\37"+
+    "\2\137\7\37\3\137\4\37\15\137\14\37\1\137\1\37\12\20\4\37"+
+    "\2\36\46\31\1\0\1\31\5\0\1\31\2\0\53\31\1\0\4\31"+
+    "\u0100\17\111\31\1\0\4\31\2\0\7\31\1\0\1\31\1\0\4\31"+
+    "\2\0\51\31\1\0\4\31\2\0\41\31\1\0\4\31\2\0\7\31"+
+    "\1\0\1\31\1\0\4\31\2\0\17\31\1\0\71\31\1\0\4\31"+
+    "\2\0\103\31\2\0\3\132\40\0\20\31\20\0\126\31\2\0\6\31"+
+    "\3\0\u026c\31\2\0\21\31\1\0\32\31\5\0\113\31\3\0\13\31"+
+    "\7\0\15\31\1\0\4\31\3\132\13\0\22\31\3\132\13\0\22\31"+
+    "\2\132\14\0\15\31\1\0\3\31\1\0\2\132\14\0\64\137\40\37"+
+    "\3\0\1\137\4\0\1\137\1\37\2\0\12\20\41\0\3\132\1\1"+
+    "\1\0\12\20\6\0\130\31\10\0\5\31\2\132\42\31\1\132\1\31"+
+    "\5\0\106\31\12\0\37\31\1\0\14\132\4\0\14\132\12\0\12\20"+
+    "\36\137\2\0\5\137\13\0\54\137\4\0\32\137\6\0\12\20\1\36"+
+    "\3\0\2\36\40\0\27\31\5\132\4\0\65\137\12\37\1\0\35\37"+
+    "\2\0\1\132\12\20\6\0\12\20\6\0\7\36\1\137\6\36\2\0"+
+    "\17\132\101\0\5\132\57\31\21\132\7\31\4\0\12\20\21\0\11\132"+
+    "\14\0\3\132\36\31\15\132\2\31\12\20\54\31\16\132\14\0\44\31"+
+    "\24\132\10\0\12\20\3\0\3\31\12\20\44\31\2\0\11\31\107\0"+
+    "\3\132\1\0\25\132\4\31\1\132\4\31\3\132\2\31\1\0\2\132"+
+    "\6\0\300\31\66\132\5\0\5\132\u0116\31\2\0\6\31\2\0\46\31"+
+    "\2\0\6\31\2\0\10\31\1\0\1\31\1\0\1\31\1\0\1\31"+
+    "\1\0\37\31\2\0\65\31\1\0\7\31\1\0\1\31\3\0\3\31"+
+    "\1\0\7\31\3\0\4\31\2\0\6\31\4\0\15\31\5\0\3\31"+
+    "\1\0\7\31\17\0\1\1\1\12\2\1\10\0\2\26\12\0\1\26"+
+    "\2\0\1\24\2\0\5\1\1\27\14\0\1\5\2\0\2\134\3\0"+
+    "\1\25\4\0\1\5\12\0\1\134\13\0\5\1\1\0\12\1\1\0"+
+    "\1\31\15\0\1\31\20\0\15\31\63\0\23\132\1\10\15\132\21\0"+
+    "\1\31\4\0\1\31\2\0\12\31\1\0\1\31\3\0\5\31\4\0"+
+    "\1\5\1\0\1\31\1\0\1\31\1\0\1\31\1\0\4\31\1\0"+
+    "\12\31\1\16\2\0\4\31\5\0\5\31\4\0\1\31\21\0\51\31"+
+    "\13\0\6\5\17\0\2\5\u016f\0\2\5\14\0\1\5\137\0\1\5"+
+    "\106\0\1\5\31\0\13\5\4\0\3\5\273\0\14\31\1\16\47\31"+
+    "\300\0\2\5\12\0\1\5\11\0\1\5\72\0\4\5\1\0\5\5"+
+    "\1\5\1\0\7\5\1\5\2\5\1\5\1\5\1\0\2\5\2\5"+
+    "\1\5\4\5\1\4\2\5\1\5\1\5\2\5\2\5\1\5\3\5"+
+    "\1\5\3\5\2\5\10\5\3\5\5\5\1\5\1\5\1\5\5\5"+
+    "\14\5\13\5\2\5\2\5\1\5\1\5\2\5\1\5\1\5\22\5"+
+    "\1\5\2\5\2\5\6\5\12\0\2\5\6\5\1\5\1\5\1\5"+
+    "\2\5\3\5\2\5\10\5\2\5\4\5\2\5\13\5\2\5\5\5"+
+    "\2\5\2\5\1\5\5\5\2\5\1\5\1\5\1\5\2\5\24\5"+
+    "\2\5\5\5\6\5\1\5\2\5\1\4\1\5\2\5\1\5\4\5"+
+    "\1\5\2\5\1\5\2\0\2\5\4\4\1\5\1\5\2\5\1\5"+
+    "\1\0\1\5\1\0\1\5\6\0\1\5\3\0\1\5\6\0\1\5"+
+    "\12\0\2\5\17\0\1\5\2\0\1\5\4\0\1\5\1\0\1\5"+
+    "\4\0\3\5\1\0\1\5\13\0\2\5\3\5\55\0\3\5\11\0"+
+    "\1\5\16\0\1\5\16\0\1\5\u0174\0\2\5\u01cf\0\3\5\23\0"+
+    "\2\5\63\0\1\5\4\0\1\5\252\0\57\31\1\0\57\31\1\0"+
+    "\205\31\6\0\4\31\3\132\2\31\14\0\46\31\1\0\1\31\5\0"+
+    "\1\31\2\0\70\31\7\0\1\31\17\0\1\132\27\31\11\0\7\31"+
+    "\1\0\7\31\1\0\7\31\1\0\7\31\1\0\7\31\1\0\7\31"+
+    "\1\0\7\31\1\0\7\31\1\0\40\132\57\0\1\31\120\0\32\30"+
+    "\1\0\131\30\14\0\326\30\57\0\1\31\1\131\1\135\31\0\11\135"+
+    "\6\132\1\5\5\133\2\0\3\135\1\31\1\31\1\5\3\0\126\136"+
+    "\2\0\2\132\2\23\3\136\1\23\132\133\1\0\4\133\5\0\51\31"+
+    "\3\0\136\17\21\0\33\31\65\0\20\133\227\0\1\5\1\0\1\5"+
+    "\66\0\57\23\1\0\130\23\250\0\u19b6\135\112\0\u51d6\135\52\0\u048d\31"+
+    "\103\0\56\31\2\0\u010d\31\3\0\20\31\12\20\2\31\24\0\57\31"+
+    "\4\132\1\0\12\132\1\0\37\31\2\132\120\31\2\132\45\0\11\31"+
+    "\2\0\147\31\2\0\44\31\1\0\10\31\77\0\13\31\1\132\3\31"+
+    "\1\132\4\31\1\132\27\31\5\132\30\0\64\31\14\0\2\132\62\31"+
+    "\22\132\12\0\12\20\6\0\22\132\6\31\3\0\1\31\1\0\1\31"+
+    "\2\0\12\20\34\31\10\132\2\0\27\31\15\132\14\0\35\17\3\0"+
+    "\4\132\57\31\16\132\16\0\1\31\12\20\6\0\5\137\1\37\12\137"+
+    "\12\20\5\137\1\0\51\31\16\132\11\0\3\31\1\132\10\31\2\132"+
+    "\2\0\12\20\6\0\27\137\3\36\1\137\3\37\62\137\1\37\1\137"+
+    "\3\37\2\137\2\37\5\137\2\37\1\137\1\37\1\137\30\0\3\137"+
+    "\2\36\13\31\5\132\2\0\3\31\2\132\12\0\6\31\2\0\6\31"+
+    "\2\0\6\31\11\0\7\31\1\0\7\31\1\0\53\31\1\0\12\31"+
+    "\12\0\163\31\10\132\1\0\2\132\2\0\12\20\6\0\u2ba4\17\14\0"+
+    "\27\17\4\0\61\17\u2104\0\u016e\135\2\0\152\135\46\0\7\31\14\0"+
+    "\5\31\5\0\1\35\1\132\12\35\1\0\15\35\1\0\5\35\1\0"+
+    "\1\35\1\0\2\35\1\0\2\35\1\0\12\35\142\31\41\0\u016b\31"+
+    "\22\0\100\31\2\0\66\31\50\0\14\31\4\0\16\132\1\6\1\11"+
+    "\1\25\2\0\1\24\1\25\13\0\20\132\3\0\2\134\30\0\3\134"+
+    "\1\25\1\0\1\26\1\0\1\25\1\24\32\0\5\31\1\0\207\31"+
+    "\2\0\1\1\7\0\1\26\4\0\1\25\1\0\1\26\1\0\12\20"+
+    "\1\24\1\25\5\0\32\31\4\0\1\134\1\0\32\31\13\0\70\133"+
+    "\2\132\37\17\3\0\6\17\2\0\6\17\2\0\6\17\2\0\3\17"+
+    "\34\0\3\1\4\0\14\31\1\0\32\31\1\0\23\31\1\0\2\31"+
+    "\1\0\17\31\2\0\16\31\42\0\173\31\105\0\65\31\210\0\1\132"+
+    "\202\0\35\31\3\0\61\31\17\0\1\132\37\0\40\31\20\0\33\31"+
+    "\5\0\46\31\5\132\5\0\36\31\2\0\44\31\4\0\10\31\1\0"+
+    "\5\31\52\0\236\31\2\0\12\20\6\0\44\31\4\0\44\31\4\0"+
+    "\50\31\10\0\64\31\234\0\u0137\31\11\0\26\31\12\0\10\31\230\0"+
+    "\6\31\2\0\1\31\1\0\54\31\1\0\2\31\3\0\1\31\2\0"+
+    "\27\31\12\0\27\31\11\0\37\31\101\0\23\31\1\0\2\31\12\0"+
+    "\26\31\12\0\32\31\106\0\70\31\6\0\2\31\100\0\1\31\3\132"+
+    "\1\0\2\132\5\0\4\132\4\31\1\0\3\31\1\0\33\31\4\0"+
+    "\3\132\4\0\1\132\40\0\35\31\3\0\35\31\43\0\10\31\1\0"+
+    "\34\31\2\132\31\0\66\31\12\0\26\31\12\0\23\31\15\0\22\31"+
+    "\156\0\111\31\67\0\63\31\15\0\63\31\u030d\0\3\132\65\31\17\132"+
+    "\37\0\12\20\17\0\4\132\55\31\13\132\2\0\1\1\22\0\31\31"+
+    "\7\0\12\20\6\0\3\132\44\31\16\132\1\0\12\20\20\0\43\31"+
+    "\1\132\2\0\1\31\11\0\3\132\60\31\16\132\4\31\5\0\3\132"+
+    "\3\0\12\20\1\31\1\0\1\31\43\0\22\31\1\0\31\31\14\132"+
+    "\6\0\1\132\101\0\7\31\1\0\1\31\1\0\4\31\1\0\17\31"+
+    "\1\0\12\31\7\0\57\31\14\132\5\0\12\20\6\0\4\132\1\0"+
+    "\10\31\2\0\2\31\2\0\26\31\1\0\7\31\1\0\2\31\1\0"+
+    "\5\31\2\0\1\132\1\31\7\132\2\0\2\132\2\0\3\132\2\0"+
+    "\1\31\6\0\1\132\5\0\5\31\2\132\2\0\7\132\3\0\5\132"+
+    "\213\0\65\31\22\132\4\31\5\0\12\20\46\0\60\31\24\132\2\31"+
+    "\1\0\1\31\10\0\12\20\246\0\57\31\7\132\2\0\11\132\27\0"+
+    "\4\31\2\132\42\0\60\31\21\132\3\0\1\31\13\0\12\20\46\0"+
+    "\53\31\15\132\10\0\12\20\66\0\32\137\3\0\17\37\4\0\12\20"+
+    "\2\36\3\0\1\36\u0160\0\100\31\12\20\25\0\1\31\u01c0\0\71\31"+
+    "\u0107\0\11\31\1\0\45\31\10\132\1\0\10\132\1\31\17\0\12\20"+
+    "\30\0\36\31\2\0\26\132\1\0\16\132\u0349\0\u039a\31\146\0\157\31"+
+    "\21\0\304\31\u0abc\0\u042f\31\u0fd1\0\u0247\31\u21b9\0\u0239\31\7\0\37\31"+
+    "\1\0\12\20\146\0\36\31\2\0\5\132\13\0\60\31\7\132\11\0"+
+    "\4\31\14\0\12\20\11\0\25\31\5\0\23\31\u0370\0\105\31\13\0"+
+    "\1\31\56\132\20\0\4\132\15\31\100\0\1\31\37\0\u17ed\131\23\0"+
+    "\u02f3\131\u250d\0\1\133\1\136\u0bfe\0\153\31\5\0\15\31\3\0\11\31"+
+    "\7\0\12\31\3\0\2\132\1\0\4\1\u14c1\0\5\132\3\0\6\132"+
+    "\10\1\10\132\2\0\7\132\36\0\4\132\224\0\3\132\u01bb\0\125\31"+
+    "\1\0\107\31\1\0\2\31\2\0\1\31\2\0\2\31\2\0\4\31"+
+    "\1\0\14\31\1\0\1\31\1\0\7\31\1\0\101\31\1\0\4\31"+
+    "\2\0\10\31\1\0\7\31\1\0\34\31\1\0\4\31\1\0\5\31"+
+    "\1\0\1\31\3\0\7\31\1\0\u0154\31\2\0\31\31\1\0\31\31"+
+    "\1\0\37\31\1\0\31\31\1\0\37\31\1\0\31\31\1\0\37\31"+
+    "\1\0\31\31\1\0\37\31\1\0\31\31\1\0\10\31\2\0\62\20"+
+    "\u0200\0\67\132\4\0\62\132\10\0\1\132\16\0\1\132\26\0\5\132"+
+    "\1\0\17\132\u0550\0\7\132\1\0\21\132\2\0\7\132\1\0\2\132"+
+    "\1\0\5\132\u07d5\0\305\31\13\0\7\132\51\0\104\31\7\132\5\0"+
+    "\12\20\u04a6\0\4\31\1\0\33\31\1\0\2\31\1\0\1\31\2\0"+
+    "\1\31\1\0\12\31\1\0\4\31\1\0\1\31\1\0\1\31\6\0"+
+    "\1\31\4\0\1\31\1\0\1\31\1\0\1\31\1\0\3\31\1\0"+
+    "\2\31\1\0\1\31\2\0\1\31\1\0\1\31\1\0\1\31\1\0"+
+    "\1\31\1\0\1\31\1\0\2\31\1\0\1\31\2\0\4\31\1\0"+
+    "\7\31\1\0\4\31\1\0\4\31\1\0\1\31\1\0\12\31\1\0"+
+    "\21\31\5\0\3\31\1\0\5\31\1\0\21\31\u0144\0\4\5\1\5"+
+    "\312\5\1\5\60\5\15\0\3\5\37\0\1\5\32\31\6\0\32\31"+
+    "\2\0\4\5\2\16\14\31\2\16\12\31\4\0\1\5\2\0\12\5"+
+    "\22\0\71\5\32\2\1\32\2\5\15\5\12\0\1\5\24\0\1\5"+
+    "\2\0\11\5\1\0\4\5\11\0\7\5\2\5\256\5\42\5\2\5"+
+    "\141\5\1\4\16\5\2\5\2\5\1\5\3\5\2\5\44\5\3\4"+
+    "\2\5\1\4\2\5\3\4\44\5\2\5\3\5\1\5\4\5\5\3"+
+    "\102\5\2\4\2\5\13\4\25\5\4\4\4\5\1\4\1\5\11\4"+
+    "\3\5\1\4\4\5\3\4\1\5\3\4\42\5\1\4\123\5\1\5"+
+    "\77\5\10\0\3\5\6\5\1\5\30\5\7\5\2\5\2\5\1\5"+
+    "\2\4\4\5\1\4\14\5\1\5\2\5\4\5\2\5\1\4\4\5"+
+    "\2\4\15\5\2\5\2\5\1\5\10\5\2\5\11\5\1\5\5\5"+
+    "\3\5\14\5\3\5\10\5\3\5\2\5\1\5\1\5\1\5\4\5"+
+    "\1\5\6\5\1\5\3\5\1\5\6\5\113\5\3\4\3\5\5\4"+
+    "\60\0\43\5\1\4\20\5\3\4\11\5\1\4\5\5\5\5\1\5"+
+    "\1\4\6\5\15\5\6\5\3\5\1\5\1\5\2\5\3\5\1\5"+
+    "\2\5\7\5\6\5\164\0\14\5\125\0\53\5\14\0\4\5\70\0"+
+    "\10\5\12\0\6\5\50\0\10\5\36\0\122\5\14\0\4\5\10\5"+
+    "\5\4\1\5\2\4\6\5\1\4\11\5\12\4\1\5\1\0\1\5"+
+    "\2\4\1\5\6\5\1\0\52\5\2\5\4\5\3\5\1\5\1\5"+
+    "\47\5\15\5\5\5\2\4\1\5\2\4\6\5\3\5\15\5\1\5"+
+    "\15\4\42\5\u05fe\5\2\0\ua6d7\135\51\0\u1035\135\13\0\336\135\2\0"+
+    "\u1682\135\u295e\0\u021e\135\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
+    "\1\1\36\0\137\13\1\14\200\0\360\132\uffff\0\uffff\0\ufe12\0";
 
   /** 
    * Translates characters to character classes
@@ -231,27 +297,85 @@ public final class UAX29URLEmailTokenizerImpl {
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\2\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
-    "\1\7\2\1\1\2\1\1\1\10\4\2\3\4\2\1"+
-    "\4\2\3\4\1\1\1\2\1\0\1\2\1\0\1\4"+
-    "\1\0\1\2\6\0\1\2\2\0\1\1\3\0\6\2"+
-    "\2\0\3\4\1\2\1\4\5\0\5\2\1\0\2\4"+
-    "\6\0\32\2\3\0\5\2\32\0\4\4\5\0\32\2"+
-    "\2\0\4\2\32\0\4\4\5\0\1\11\1\0\1\2"+
-    "\1\12\2\2\2\12\1\2\10\12\1\2\2\12\1\2"+
-    "\4\12\1\2\1\12\1\2\2\12\2\2\6\12\2\2"+
-    "\1\12\1\2\1\12\1\2\11\12\3\2\3\12\1\2"+
-    "\3\12\2\2\2\12\1\2\3\12\7\2\1\12\10\2"+
-    "\1\12\4\2\1\12\1\2\2\12\1\2\2\12\2\2"+
-    "\1\12\1\2\1\12\1\2\3\12\3\2\1\12\1\2"+
-    "\1\12\1\2\1\12\1\2\1\12\2\2\1\12\2\2"+
-    "\2\12\1\2\3\12\1\2\5\12\1\2\1\12\1\2"+
-    "\1\12\3\2\4\12\1\2\6\12\4\2\1\12\2\2"+
-    "\1\12\6\2\1\12\1\2\2\12\1\2\5\12\3\2"+
-    "\2\12\2\2\5\12\1\2\14\12\1\2\1\12\2\2"+
-    "\3\12\5\2\2\12\1\2\2\12\2\2\2\12\26\2"+
-    "\3\12\3\2\1\12\1\2\2\0\2\2\1\0\1\2"+
-    "\1\0\1\12\2\0\2\12\1\0\10\12\1\0\2\12"+
+    "\2\0\2\1\3\2\2\1\1\3\1\2\1\4\2\5"+
+    "\1\6\1\1\1\7\1\10\2\1\1\3\1\11\4\3"+
+    "\2\5\2\1\1\5\4\3\2\5\1\1\1\0\5\2"+
+    "\2\0\1\2\4\0\1\3\1\0\1\3\2\2\1\0"+
+    "\2\5\1\2\2\5\3\0\1\5\1\0\1\3\3\0"+
+    "\1\3\1\0\1\3\2\0\5\3\3\5\1\3\3\0"+
+    "\1\5\2\0\1\3\1\0\4\3\2\5\2\0\2\2"+
+    "\2\0\1\2\3\0\2\3\2\2\2\5\32\0\2\5"+
+    "\4\0\37\3\5\0\2\5\32\0\2\5\2\0\36\3"+
+    "\3\0\3\2\1\0\1\2\2\0\1\12\1\0\1\3"+
+    "\3\2\1\0\1\5\3\0\1\13\2\0\2\13\1\0"+
+    "\10\13\1\0\2\13\1\0\4\13\1\0\1\13\1\0"+
+    "\2\13\2\0\6\13\2\0\1\13\1\0\1\13\1\0"+
+    "\11\13\3\0\3\13\1\0\3\13\2\0\2\13\1\0"+
+    "\3\13\7\0\1\13\10\0\1\13\4\0\1\13\1\0"+
+    "\2\13\1\0\2\13\2\0\1\13\1\0\1\13\1\0"+
+    "\3\13\3\0\1\13\1\0\1\13\1\0\1\13\1\0"+
+    "\1\13\2\0\1\13\2\0\2\13\1\0\3\13\1\0"+
+    "\5\13\1\0\1\13\1\0\1\13\3\0\4\13\1\0"+
+    "\6\13\4\0\1\13\2\0\1\13\6\0\1\13\1\0"+
+    "\2\13\1\0\5\13\3\0\2\13\2\0\5\13\1\0"+
+    "\14\13\1\0\1\13\2\0\3\13\5\0\2\13\1\0"+
+    "\2\13\2\0\2\13\26\0\3\13\3\0\1\13\1\0"+
+    "\1\5\1\3\1\13\2\3\2\13\1\3\10\13\1\3"+
+    "\2\13\1\3\4\13\1\3\1\13\1\3\2\13\2\3"+
+    "\6\13\2\3\1\13\1\3\1\13\1\3\11\13\3\3"+
+    "\3\13\1\3\3\13\2\3\2\13\1\3\3\13\7\3"+
+    "\1\13\10\3\1\13\4\3\1\13\1\3\2\13\1\3"+
+    "\2\13\2\3\1\13\1\3\1\13\1\3\3\13\3\3"+
+    "\1\13\1\3\1\13\1\3\1\13\1\3\1\13\2\3"+
+    "\1\13\2\3\2\13\1\3\3\13\1\3\5\13\1\3"+
+    "\1\13\1\3\1\13\3\3\4\13\1\3\6\13\4\3"+
+    "\1\13\2\3\1\13\6\3\1\13\1\3\2\13\1\3"+
+    "\5\13\3\3\2\13\2\3\5\13\1\3\14\13\1\3"+
+    "\1\13\2\3\3\13\5\3\2\13\1\3\2\13\2\3"+
+    "\2\13\26\3\3\13\3\3\1\13\1\3\1\0\2\3"+
+    "\1\0\1\3\7\0\1\13\1\0\1\5\375\0\1\5"+
+    "\376\3\42\0\3\5\1\14\1\15\1\16\1\15\2\14"+
+    "\5\15\1\14\1\15\1\14\2\16\1\0\1\13\1\0"+
+    "\1\13\2\0\2\15\1\0\26\15\3\0\5\15\3\0"+
+    "\4\15\2\0\7\15\2\13\4\15\1\0\6\15\1\13"+
+    "\2\15\1\13\4\15\5\0\1\13\2\0\3\15\1\13"+
+    "\13\15\1\13\12\15\6\0\1\15\3\0\13\15\5\0"+
+    "\1\13\4\0\2\15\3\0\1\13\14\0\1\15\4\0"+
+    "\1\13\4\0\1\15\1\13\2\0\1\13\1\0\2\15"+
+    "\3\0\11\15\1\0\2\15\1\0\1\15\1\0\3\15"+
+    "\1\13\4\15\1\13\2\15\2\0\2\15\1\0\1\15"+
+    "\1\0\1\13\1\15\5\0\2\15\4\0\1\13\1\15"+
+    "\3\13\21\15\1\13\6\15\1\0\7\15\1\13\5\15"+
+    "\2\13\4\0\12\15\4\0\4\15\1\13\6\15\1\13"+
+    "\2\0\12\15\4\0\4\15\5\0\6\15\7\0\5\15"+
+    "\1\13\6\15\1\13\2\15\1\0\1\13\2\0\4\15"+
+    "\1\13\1\15\1\13\3\15\5\0\1\13\1\0\5\15"+
+    "\1\13\3\15\1\13\2\15\1\13\5\15\3\0\3\15"+
+    "\1\13\20\15\1\13\16\15\1\13\6\0\3\15\1\13"+
+    "\2\15\1\13\3\15\11\0\1\15\3\0\3\15\1\0"+
+    "\2\15\1\13\16\0\1\13\3\0\1\13\1\0\5\15"+
+    "\2\0\1\14\2\15\1\14\1\15\3\14\1\15\1\14"+
+    "\5\15\1\14\2\15\1\3\1\13\1\3\1\13\2\3"+
+    "\2\15\1\3\26\15\3\3\5\15\3\3\4\15\2\3"+
+    "\7\15\2\13\4\15\1\3\6\15\1\13\2\15\1\13"+
+    "\4\15\5\3\1\13\2\3\3\15\1\13\13\15\1\13"+
+    "\12\15\6\3\1\15\3\3\13\15\5\3\1\13\4\3"+
+    "\2\15\3\3\1\13\14\3\1\15\4\3\1\13\4\3"+
+    "\1\15\1\13\2\3\1\13\1\3\2\15\3\3\11\15"+
+    "\1\3\2\15\1\3\1\15\1\3\3\15\1\13\4\15"+
+    "\1\13\2\15\2\3\2\15\1\3\1\15\1\3\1\13"+
+    "\1\15\5\3\2\15\4\3\1\13\1\15\3\13\21\15"+
+    "\1\13\6\15\1\3\7\15\1\13\5\15\2\13\4\3"+
+    "\12\15\4\3\4\15\1\13\6\15\1\13\2\3\12\15"+
+    "\4\3\4\15\5\3\6\15\7\3\5\15\1\13\6\15"+
+    "\1\13\2\15\1\3\1\13\2\3\4\15\1\13\1\15"+
+    "\1\13\3\15\5\3\1\13\1\3\5\15\1\13\3\15"+
+    "\1\13\2\15\1\13\5\15\3\3\3\15\1\13\20\15"+
+    "\1\13\16\15\1\13\6\3\3\15\1\13\2\15\1\13"+
+    "\3\15\11\3\1\15\3\3\3\15\1\3\2\15\1\13"+
+    "\15\3\1\13\3\3\1\13\1\3\5\15\3\3\1\0"+
+    "\1\3\17\0\3\16\3\5\1\14\u01d9\0\1\14\u01da\3"+
+    "\16\0\1\12\2\0\2\12\1\0\10\12\1\0\2\12"+
     "\1\0\4\12\1\0\1\12\1\0\2\12\2\0\6\12"+
     "\2\0\1\12\1\0\1\12\1\0\11\12\3\0\3\12"+
     "\1\0\3\12\2\0\2\12\1\0\3\12\7\0\1\12"+
@@ -263,186 +387,128 @@ public final class UAX29URLEmailTokenizerImpl {
     "\2\0\1\12\6\0\1\12\1\0\2\12\1\0\5\12"+
     "\3\0\2\12\2\0\5\12\1\0\14\12\1\0\1\12"+
     "\2\0\3\12\5\0\2\12\1\0\2\12\2\0\2\12"+
-    "\26\0\3\12\3\0\1\12\2\0\2\4\10\0\1\12"+
-    "\373\2\1\0\3\2\374\0\2\4\43\0\2\13\2\14"+
-    "\2\13\1\14\1\13\1\14\1\13\7\14\2\13\1\14"+
-    "\1\13\3\15\1\14\1\2\1\12\1\2\1\12\2\2"+
-    "\2\14\1\2\26\14\3\2\5\14\3\2\4\14\2\2"+
-    "\7\14\2\12\4\14\1\2\6\14\1\12\2\14\1\12"+
-    "\4\14\5\2\1\12\2\2\3\14\1\12\13\14\1\12"+
-    "\12\14\6\2\1\14\3\2\13\14\5\2\1\12\4\2"+
-    "\2\14\3\2\1\12\14\2\1\14\4\2\1\12\4\2"+
-    "\1\14\1\12\2\2\1\12\1\2\2\14\3\2\11\14"+
-    "\1\2\2\14\1\2\1\14\1\2\3\14\1\12\4\14"+
-    "\1\12\2\14\2\2\2\14\1\2\1\14\1\2\1\12"+
-    "\1\14\5\2\2\14\4\2\1\12\1\14\3\12\21\14"+
-    "\1\12\6\14\1\2\7\14\1\12\5\14\2\12\4\2"+
-    "\12\14\4\2\4\14\1\12\6\14\1\12\2\2\12\14"+
-    "\4\2\4\14\5\2\6\14\7\2\5\14\1\12\6\14"+
-    "\1\12\2\14\1\2\1\12\2\2\4\14\1\12\1\14"+
-    "\1\12\3\14\5\2\1\12\1\2\5\14\1\12\3\14"+
-    "\1\12\2\14\1\12\5\14\3\2\3\14\1\12\20\14"+
-    "\1\12\16\14\1\12\6\2\3\14\1\12\2\14\1\12"+
-    "\3\14\11\2\1\14\3\2\3\14\1\2\2\14\1\12"+
-    "\2\2\1\0\13\2\1\12\3\2\1\12\1\2\5\14"+
-    "\3\2\1\0\1\2\2\0\1\13\5\14\1\13\1\14"+
-    "\1\0\1\12\1\0\1\12\2\0\2\14\1\0\26\14"+
-    "\3\0\5\14\3\0\4\14\2\0\7\14\2\12\4\14"+
-    "\1\0\6\14\1\12\2\14\1\12\4\14\5\0\1\12"+
-    "\2\0\3\14\1\12\13\14\1\12\12\14\6\0\1\14"+
-    "\3\0\13\14\5\0\1\12\4\0\2\14\3\0\1\12"+
-    "\14\0\1\14\4\0\1\12\4\0\1\14\1\12\2\0"+
-    "\1\12\1\0\2\14\3\0\11\14\1\0\2\14\1\0"+
-    "\1\14\1\0\3\14\1\12\4\14\1\12\2\14\2\0"+
-    "\2\14\1\0\1\14\1\0\1\12\1\14\5\0\2\14"+
-    "\4\0\1\12\1\14\3\12\21\14\1\12\6\14\1\0"+
-    "\7\14\1\12\5\14\2\12\4\0\12\14\4\0\4\14"+
-    "\1\12\6\14\1\12\2\0\12\14\4\0\4\14\5\0"+
-    "\6\14\7\0\5\14\1\12\6\14\1\12\2\14\1\0"+
-    "\1\12\2\0\4\14\1\12\1\14\1\12\3\14\5\0"+
-    "\1\12\1\0\5\14\1\12\3\14\1\12\2\14\1\12"+
-    "\5\14\3\0\3\14\1\12\20\14\1\12\16\14\1\12"+
-    "\6\0\3\14\1\12\2\14\1\12\3\14\11\0\1\14"+
-    "\3\0\3\14\1\0\2\14\1\12\15\0\1\12\3\0"+
-    "\1\12\1\0\5\14\2\0\3\4\15\0\3\15\1\13"+
-    "\u01c1\2\1\0\31\2\1\13\u01d8\0\3\4\16\0\1\11"+
-    "\2\0\2\11\1\0\10\11\1\0\2\11\1\0\4\11"+
-    "\1\0\1\11\1\0\2\11\2\0\6\11\2\0\1\11"+
-    "\1\0\1\11\1\0\11\11\3\0\3\11\1\0\3\11"+
-    "\2\0\2\11\1\0\3\11\7\0\1\11\10\0\1\11"+
-    "\4\0\1\11\1\0\2\11\1\0\2\11\2\0\1\11"+
-    "\1\0\1\11\1\0\3\11\3\0\1\11\1\0\1\11"+
-    "\1\0\1\11\1\0\1\11\2\0\1\11\2\0\2\11"+
-    "\1\0\3\11\1\0\5\11\1\0\1\11\1\0\1\11"+
-    "\3\0\4\11\1\0\6\11\4\0\1\11\2\0\1\11"+
-    "\6\0\1\11\1\0\2\11\1\0\5\11\3\0\2\11"+
-    "\2\0\5\11\1\0\14\11\1\0\1\11\2\0\3\11"+
-    "\5\0\2\11\1\0\2\11\2\0\2\11\26\0\3\11"+
-    "\3\0\1\11\1\0\1\13\1\14\1\13\32\14\2\15"+
-    "\1\0\2\15\1\0\2\15\1\0\1\15\1\2\2\14"+
-    "\24\2\1\12\14\2\1\12\11\2\2\14\2\12\10\2"+
-    "\3\14\1\12\1\14\7\2\1\12\1\2\2\14\21\2"+
-    "\1\12\24\2\1\12\1\14\5\2\2\14\12\2\1\12"+
-    "\4\2\1\14\3\2\1\14\1\2\1\14\15\2\1\12"+
-    "\2\14\5\2\1\14\6\2\1\14\13\2\2\14\1\12"+
-    "\2\2\4\14\1\2\1\12\20\2\4\14\5\2\1\12"+
-    "\3\2\1\12\1\2\2\14\4\2\1\12\1\2\1\14"+
-    "\5\2\1\12\4\2\1\14\3\2\1\14\1\12\11\2"+
-    "\1\12\4\2\1\12\7\2\1\14\3\2\2\12\1\14"+
-    "\2\2\1\14\12\2\1\14\5\2\2\14\1\2\3\14"+
-    "\1\2\1\12\1\14\4\2\1\12\2\2\1\12\5\2"+
-    "\1\14\3\2\1\12\1\2\1\12\20\2\1\12\6\2"+
-    "\1\14\1\12\1\2\1\12\1\2\1\14\21\2\1\0"+
-    "\4\2\1\12\4\2\1\14\7\2\1\0\1\2\2\0"+
-    "\32\14\1\0\2\14\24\0\1\12\14\0\1\12\11\0"+
-    "\2\14\2\12\10\0\3\14\1\12\1\14\7\0\1\12"+
-    "\1\0\2\14\21\0\1\12\24\0\1\12\1\14\5\0"+
-    "\2\14\12\0\1\12\4\0\1\14\3\0\1\14\1\0"+
-    "\1\14\15\0\1\12\2\14\5\0\1\14\6\0\1\14"+
-    "\13\0\2\14\1\12\2\0\4\14\1\0\1\12\20\0"+
-    "\4\14\5\0\1\12\3\0\1\12\1\0\2\14\4\0"+
-    "\1\12\1\0\1\14\5\0\1\12\4\0\1\14\3\0"+
-    "\1\14\1\12\11\0\1\12\4\0\1\12\7\0\1\14"+
-    "\3\0\2\12\1\14\2\0\1\14\12\0\1\14\5\0"+
-    "\2\14\1\0\3\14\1\0\1\12\1\14\4\0\1\12"+
-    "\2\0\1\12\5\0\1\14\3\0\1\12\1\0\1\12"+
-    "\20\0\1\12\6\0\1\14\1\12\1\0\1\12\1\0"+
-    "\1\14\25\0\1\12\4\0\1\14\10\0\2\4\20\0"+
-    "\u0155\2\1\0\20\2\u0165\0\2\4\15\0\1\11\4\0"+
-    "\1\11\1\0\1\11\60\0\2\11\10\0\1\11\2\0"+
-    "\1\11\11\0\1\11\5\0\1\11\13\0\1\11\42\0"+
-    "\1\11\11\0\1\11\21\0\1\11\5\0\1\11\2\0"+
-    "\1\11\26\0\1\11\4\0\1\11\7\0\1\11\14\0"+
-    "\1\11\1\0\3\11\20\0\1\11\15\0\1\11\5\0"+
-    "\2\11\24\0\1\11\4\0\1\11\52\0\1\11\6\0"+
-    "\1\11\3\0\1\11\5\0\1\11\1\0\1\11\10\0"+
-    "\1\11\6\0\1\11\3\0\1\11\2\0\1\11\13\0"+
-    "\1\11\16\0\1\11\16\0\1\11\11\0\1\11\2\0"+
-    "\1\11\24\0\1\11\16\0\1\11\3\0\1\11\7\0"+
-    "\1\15\3\0\22\2\1\14\7\2\1\14\6\2\1\14"+
-    "\2\2\1\12\3\2\1\14\14\2\1\14\1\12\10\2"+
-    "\1\14\7\2\1\12\17\2\1\12\4\2\1\14\22\2"+
-    "\1\14\17\2\1\14\1\2\1\14\6\2\1\14\1\2"+
-    "\2\14\6\2\1\14\16\2\2\14\16\2\1\14\5\2"+
-    "\1\14\4\2\1\14\17\2\1\14\10\2\41\0\1\12"+
-    "\1\14\7\2\33\0\1\14\7\0\1\14\6\0\1\14"+
-    "\2\0\1\12\3\0\1\14\14\0\1\14\1\12\10\0"+
-    "\1\14\7\0\1\12\17\0\1\12\4\0\1\14\22\0"+
-    "\1\14\17\0\1\14\1\0\1\14\6\0\1\14\1\0"+
-    "\2\14\6\0\1\14\16\0\2\14\16\0\1\14\5\0"+
-    "\1\14\4\0\1\14\17\0\1\14\10\0\1\12\1\14"+
-    "\7\0\3\12\20\0\312\2\41\0\11\2\323\0\3\4"+
-    "\42\0\1\11\11\0\1\11\12\0\2\11\12\0\1\11"+
-    "\10\0\1\11\22\0\1\11\23\0\1\11\22\0\1\11"+
-    "\23\0\1\11\31\0\1\11\7\0\1\11\26\0\1\11"+
-    "\3\0\1\11\6\0\1\11\7\0\1\11\11\0\1\11"+
-    "\11\0\1\11\4\0\1\11\13\0\2\11\27\0\1\11"+
-    "\5\0\1\11\2\0\1\11\10\0\1\11\1\0\1\11"+
-    "\20\0\1\11\5\0\1\11\1\0\1\11\24\0\1\11"+
-    "\13\0\1\15\24\2\1\14\10\2\1\14\4\2\1\12"+
-    "\5\2\1\14\1\2\1\12\3\2\1\14\22\2\1\12"+
-    "\17\2\1\12\2\2\1\12\34\2\142\0\4\2\1\0"+
-    "\1\15\6\0\1\2\3\0\1\15\6\0\1\15\25\0"+
-    "\1\14\10\0\1\14\4\0\1\12\5\0\1\14\1\0"+
-    "\1\12\3\0\1\14\22\0\1\12\17\0\1\12\2\0"+
-    "\1\12\37\0\1\13\1\14\1\13\1\14\2\13\4\14"+
-    "\14\0\151\2\142\0\4\2\154\0\1\13\2\4\56\0"+
-    "\1\11\20\0\1\11\20\0\1\11\16\0\1\11\227\0"+
-    "\1\11\10\0\1\15\23\2\2\14\1\2\1\14\11\2"+
-    "\1\12\2\2\2\14\1\12\4\2\1\14\3\2\1\12"+
-    "\11\2\142\0\2\2\5\0\1\15\10\0\2\2\42\0"+
-    "\2\14\1\0\1\14\11\0\1\12\2\0\2\14\1\12"+
-    "\4\0\1\14\3\0\1\12\13\0\1\14\14\0\62\2"+
-    "\142\0\2\2\137\0\1\11\7\0\1\11\24\0\1\11"+
-    "\17\0\1\11\2\0\1\11\176\0\1\15\20\2\1\14"+
-    "\5\2\160\0\2\2\1\15\54\0\1\14\23\0\24\2"+
-    "\157\0\2\2\76\0\1\11\3\0\1\11\7\0\1\11"+
-    "\155\0\6\2\1\12\10\2\63\0\1\12\42\0\1\2"+
-    "\1\15\7\0\1\15\2\0\1\16\37\0\1\12\24\0"+
-    "\16\2\126\0\1\2\237\0\12\2\41\0\1\14\22\0"+
-    "\1\2\1\15\130\0\12\2\63\0\1\2\31\0\1\11"+
-    "\73\0\1\11\43\0\5\2\5\0\1\12\14\0\1\12"+
-    "\26\0\1\15\16\0\1\16\2\0\2\16\1\0\10\16"+
-    "\1\0\2\16\1\0\4\16\1\0\1\16\1\0\2\16"+
-    "\2\0\6\16\2\0\1\16\1\0\1\16\1\0\11\16"+
-    "\3\0\3\16\1\0\3\16\2\0\2\16\1\0\3\16"+
-    "\7\0\1\16\10\0\1\16\4\0\1\16\1\0\2\16"+
-    "\1\0\2\16\2\0\1\16\1\0\1\16\1\0\3\16"+
-    "\3\0\1\16\1\0\1\16\1\0\1\16\1\0\1\16"+
-    "\2\0\1\16\2\0\2\16\1\0\3\16\1\0\5\16"+
-    "\1\0\1\16\1\0\1\16\3\0\4\16\1\0\6\16"+
-    "\4\0\1\16\2\0\1\16\6\0\1\16\1\0\2\16"+
-    "\1\0\5\16\3\0\2\16\2\0\5\16\1\0\14\16"+
-    "\1\0\1\16\2\0\3\16\5\0\2\16\1\0\2\16"+
-    "\2\0\2\16\26\0\3\16\3\0\1\16\54\0\4\2"+
-    "\164\0\1\2\14\0\1\14\20\0\1\15\15\0\1\16"+
-    "\4\0\1\16\1\0\1\16\60\0\2\16\10\0\1\16"+
-    "\2\0\1\16\11\0\1\16\5\0\1\16\13\0\1\16"+
-    "\42\0\1\16\11\0\1\16\21\0\1\16\5\0\1\16"+
-    "\2\0\1\16\26\0\1\16\4\0\1\16\7\0\1\16"+
-    "\14\0\1\16\1\0\3\16\20\0\1\16\15\0\1\16"+
-    "\5\0\2\16\24\0\1\16\4\0\1\16\52\0\1\16"+
-    "\6\0\1\16\3\0\1\16\5\0\1\16\1\0\1\16"+
-    "\10\0\1\16\6\0\1\16\3\0\1\16\2\0\1\16"+
-    "\13\0\1\16\16\0\1\16\16\0\1\16\11\0\1\16"+
-    "\2\0\1\16\24\0\1\16\16\0\1\16\3\0\1\16"+
-    "\57\0\1\2\61\0\1\11\14\0\1\11\26\0\1\2"+
-    "\62\0\1\16\11\0\1\16\12\0\2\16\12\0\1\16"+
-    "\10\0\1\16\22\0\1\16\23\0\1\16\22\0\1\16"+
-    "\23\0\1\16\31\0\1\16\7\0\1\16\26\0\1\16"+
-    "\3\0\1\16\6\0\1\16\7\0\1\16\11\0\1\16"+
-    "\11\0\1\16\4\0\1\16\13\0\2\16\27\0\1\16"+
-    "\5\0\1\16\2\0\1\16\10\0\1\16\1\0\1\16"+
-    "\20\0\1\16\5\0\1\16\1\0\1\16\24\0\1\16"+
-    "\13\0\1\15\42\0\1\2\67\0\1\2\70\0\1\16"+
-    "\20\0\1\16\20\0\1\16\16\0\1\16\227\0\1\16"+
-    "\45\0\1\2\41\0\1\2\61\0\1\16\7\0\1\16"+
-    "\24\0\1\16\17\0\1\16\2\0\1\16\325\0\1\16"+
-    "\3\0\1\16\7\0\1\16\u0144\0\1\16\73\0\1\16"+
-    "\273\0\1\16\14\0\1\16\224\0";
+    "\26\0\3\12\3\0\1\12\1\0\1\5\1\0\1\5"+
+    "\1\16\1\0\1\16\2\14\32\15\2\16\1\0\2\16"+
+    "\1\0\1\16\1\0\2\15\24\0\1\13\14\0\1\13"+
+    "\11\0\2\15\2\13\10\0\3\15\1\13\1\15\7\0"+
+    "\1\13\1\0\2\15\21\0\1\13\24\0\1\13\1\15"+
+    "\5\0\2\15\12\0\1\13\4\0\1\15\3\0\1\15"+
+    "\1\0\1\15\15\0\1\13\2\15\5\0\1\15\6\0"+
+    "\1\15\13\0\2\15\1\13\2\0\4\15\1\0\1\13"+
+    "\20\0\4\15\5\0\1\13\3\0\1\13\1\0\2\15"+
+    "\4\0\1\13\1\0\1\15\5\0\1\13\4\0\1\15"+
+    "\3\0\1\15\1\13\11\0\1\13\4\0\1\13\7\0"+
+    "\1\15\3\0\2\13\1\15\2\0\1\15\12\0\1\15"+
+    "\5\0\2\15\1\0\3\15\1\0\1\13\1\15\4\0"+
+    "\1\13\2\0\1\13\5\0\1\15\3\0\1\13\1\0"+
+    "\1\13\20\0\1\13\6\0\1\15\1\13\1\0\1\13"+
+    "\1\0\1\15\26\0\1\13\4\0\1\15\7\0\33\15"+
+    "\1\3\2\15\24\3\1\13\14\3\1\13\11\3\2\15"+
+    "\2\13\10\3\3\15\1\13\1\15\7\3\1\13\1\3"+
+    "\2\15\21\3\1\13\24\3\1\13\1\15\5\3\2\15"+
+    "\12\3\1\13\4\3\1\15\3\3\1\15\1\3\1\15"+
+    "\15\3\1\13\2\15\5\3\1\15\6\3\1\15\13\3"+
+    "\2\15\1\13\2\3\4\15\1\3\1\13\20\3\4\15"+
+    "\5\3\1\13\3\3\1\13\1\3\2\15\4\3\1\13"+
+    "\1\3\1\15\5\3\1\13\4\3\1\15\3\3\1\15"+
+    "\1\13\11\3\1\13\4\3\1\13\7\3\1\15\3\3"+
+    "\2\13\1\15\2\3\1\15\12\3\1\15\5\3\2\15"+
+    "\1\3\3\15\1\3\1\13\1\15\4\3\1\13\2\3"+
+    "\1\13\5\3\1\15\3\3\1\13\1\3\1\13\20\3"+
+    "\1\13\6\3\1\15\1\13\1\3\1\13\1\3\1\15"+
+    "\25\3\1\13\4\3\1\15\7\3\1\0\1\3\22\0"+
+    "\1\5\1\0\1\5\u0165\0\u0165\3\15\0\1\12\4\0"+
+    "\1\12\1\0\1\12\60\0\2\12\10\0\1\12\2\0"+
+    "\1\12\11\0\1\12\5\0\1\12\13\0\1\12\42\0"+
+    "\1\12\11\0\1\12\21\0\1\12\5\0\1\12\2\0"+
+    "\1\12\26\0\1\12\4\0\1\12\7\0\1\12\14\0"+
+    "\1\12\1\0\3\12\20\0\1\12\15\0\1\12\5\0"+
+    "\2\12\24\0\1\12\4\0\1\12\52\0\1\12\6\0"+
+    "\1\12\3\0\1\12\5\0\1\12\1\0\1\12\10\0"+
+    "\1\12\6\0\1\12\3\0\1\12\2\0\1\12\13\0"+
+    "\1\12\16\0\1\12\16\0\1\12\11\0\1\12\2\0"+
+    "\1\12\24\0\1\12\16\0\1\12\3\0\1\12\7\0"+
+    "\3\13\1\0\1\16\24\0\1\15\7\0\1\15\6\0"+
+    "\1\15\2\0\1\13\3\0\1\15\14\0\1\15\1\13"+
+    "\10\0\1\15\7\0\1\13\17\0\1\13\4\0\1\15"+
+    "\22\0\1\15\17\0\1\15\1\0\1\15\6\0\1\15"+
+    "\1\0\2\15\6\0\1\15\16\0\2\15\16\0\1\15"+
+    "\5\0\1\15\4\0\1\15\17\0\1\15\51\0\1\13"+
+    "\1\15\7\0\22\3\1\15\7\3\1\15\6\3\1\15"+
+    "\2\3\1\13\3\3\1\15\14\3\1\15\1\13\10\3"+
+    "\1\15\7\3\1\13\17\3\1\13\4\3\1\15\22\3"+
+    "\1\15\17\3\1\15\1\3\1\15\6\3\1\15\1\3"+
+    "\2\15\6\3\1\15\16\3\2\15\16\3\1\15\5\3"+
+    "\1\15\4\3\1\15\17\3\1\15\10\3\1\13\1\15"+
+    "\7\3\31\0\3\5\364\0\323\3\42\0\1\12\11\0"+
+    "\1\12\12\0\2\12\12\0\1\12\10\0\1\12\22\0"+
+    "\1\12\23\0\1\12\22\0\1\12\23\0\1\12\31\0"+
+    "\1\12\7\0\1\12\26\0\1\12\3\0\1\12\6\0"+
+    "\1\12\7\0\1\12\11\0\1\12\11\0\1\12\4\0"+
+    "\1\12\13\0\2\12\27\0\1\12\5\0\1\12\2\0"+
+    "\1\12\10\0\1\12\1\0\1\12\20\0\1\12\5\0"+
+    "\1\12\1\0\1\12\24\0\1\12\13\0\1\14\2\15"+
+    "\4\14\4\15\1\16\24\0\1\15\10\0\1\15\4\0"+
+    "\1\13\5\0\1\15\1\0\1\13\3\0\1\15\22\0"+
+    "\1\13\17\0\1\13\2\0\1\13\201\0\24\3\1\15"+
+    "\10\3\1\15\4\3\1\13\5\3\1\15\1\3\1\13"+
+    "\3\3\1\15\22\3\1\13\17\3\1\13\2\3\1\13"+
+    "\40\3\1\16\7\0\1\3\3\0\1\16\6\0\1\16"+
+    "\15\0\1\5\1\14\1\5\316\0\155\3\56\0\1\12"+
+    "\20\0\1\12\20\0\1\12\16\0\1\12\227\0\1\12"+
+    "\10\0\1\15\1\16\23\0\2\15\1\0\1\15\11\0"+
+    "\1\13\2\0\2\15\1\13\4\0\1\15\3\0\1\13"+
+    "\155\0\23\3\2\15\1\3\1\15\11\3\1\13\2\3"+
+    "\2\15\1\13\4\3\1\15\3\3\1\13\13\3\5\0"+
+    "\1\16\6\0\1\3\2\0\1\3\261\0\64\3\53\0"+
+    "\1\12\7\0\1\12\24\0\1\12\17\0\1\12\2\0"+
+    "\1\12\176\0\1\16\20\0\1\15\167\0\20\3\1\15"+
+    "\7\3\1\16\255\0\26\3\50\0\1\12\3\0\1\12"+
+    "\7\0\1\12\163\0\1\13\76\0\1\13\40\0\6\3"+
+    "\1\13\11\3\1\16\7\0\1\16\2\0\1\17\211\0"+
+    "\17\3\275\0\1\15\21\0\13\3\1\16\213\0\13\3"+
+    "\16\0\1\12\76\0\1\12\54\0\1\13\14\0\1\13"+
+    "\24\0\5\3\1\16\16\0\1\17\2\0\2\17\1\0"+
+    "\10\17\1\0\2\17\1\0\4\17\1\0\1\17\1\0"+
+    "\2\17\2\0\6\17\2\0\1\17\1\0\1\17\1\0"+
+    "\11\17\3\0\3\17\1\0\3\17\2\0\2\17\1\0"+
+    "\3\17\7\0\1\17\10\0\1\17\4\0\1\17\1\0"+
+    "\2\17\1\0\2\17\2\0\1\17\1\0\1\17\1\0"+
+    "\3\17\3\0\1\17\1\0\1\17\1\0\1\17\1\0"+
+    "\1\17\2\0\1\17\2\0\2\17\1\0\3\17\1\0"+
+    "\5\17\1\0\1\17\1\0\1\17\3\0\4\17\1\0"+
+    "\6\17\4\0\1\17\2\0\1\17\6\0\1\17\1\0"+
+    "\2\17\1\0\5\17\3\0\2\17\2\0\5\17\1\0"+
+    "\14\17\1\0\1\17\2\0\3\17\5\0\2\17\1\0"+
+    "\2\17\2\0\2\17\26\0\3\17\3\0\1\17\124\0"+
+    "\4\3\125\0\1\15\17\0\1\3\1\16\15\0\1\17"+
+    "\4\0\1\17\1\0\1\17\60\0\2\17\10\0\1\17"+
+    "\2\0\1\17\11\0\1\17\5\0\1\17\13\0\1\17"+
+    "\42\0\1\17\11\0\1\17\21\0\1\17\5\0\1\17"+
+    "\2\0\1\17\26\0\1\17\4\0\1\17\7\0\1\17"+
+    "\14\0\1\17\1\0\3\17\20\0\1\17\15\0\1\17"+
+    "\5\0\2\17\24\0\1\17\4\0\1\17\52\0\1\17"+
+    "\6\0\1\17\3\0\1\17\5\0\1\17\1\0\1\17"+
+    "\10\0\1\17\6\0\1\17\3\0\1\17\2\0\1\17"+
+    "\13\0\1\17\16\0\1\17\16\0\1\17\11\0\1\17"+
+    "\2\0\1\17\24\0\1\17\16\0\1\17\3\0\1\17"+
+    "\112\0\1\3\27\0\1\12\14\0\1\12\46\0\1\3"+
+    "\41\0\1\17\11\0\1\17\12\0\2\17\12\0\1\17"+
+    "\10\0\1\17\22\0\1\17\23\0\1\17\22\0\1\17"+
+    "\23\0\1\17\31\0\1\17\7\0\1\17\26\0\1\17"+
+    "\3\0\1\17\6\0\1\17\7\0\1\17\11\0\1\17"+
+    "\11\0\1\17\4\0\1\17\13\0\2\17\27\0\1\17"+
+    "\5\0\1\17\2\0\1\17\10\0\1\17\1\0\1\17"+
+    "\20\0\1\17\5\0\1\17\1\0\1\17\24\0\1\17"+
+    "\13\0\1\16\63\0\1\3\60\0\1\3\56\0\1\17"+
+    "\20\0\1\17\20\0\1\17\16\0\1\17\227\0\1\17"+
+    "\57\0\1\3\35\0\1\3\53\0\1\17\7\0\1\17"+
+    "\24\0\1\17\17\0\1\17\2\0\1\17\324\0\1\17"+
+    "\3\0\1\17\7\0\1\17\u0144\0\1\17\76\0\1\17"+
+    "\272\0\1\17\14\0\1\17\222\0";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[12851];
+    int [] result = new int[12892];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -467,1616 +533,1621 @@ public final class UAX29URLEmailTokenizerImpl {
   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
 
   private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\124\0\250\0\374\0\u0150\0\u01a4\0\u01f8\0\u024c"+
-    "\0\u02a0\0\u02f4\0\u0348\0\u039c\0\u03f0\0\u0444\0\u0498\0\u04ec"+
-    "\0\u0540\0\u0594\0\u05e8\0\u063c\0\u0690\0\u06e4\0\u0738\0\u078c"+
-    "\0\u07e0\0\u0834\0\u0888\0\u08dc\0\u0930\0\u0984\0\u09d8\0\u0a2c"+
-    "\0\u0a80\0\u0ad4\0\u0b28\0\u0b7c\0\u0bd0\0\u024c\0\u0c24\0\u0348"+
-    "\0\u0c78\0\u0ccc\0\u039c\0\u0d20\0\u0d74\0\u0dc8\0\u0e1c\0\u0444"+
-    "\0\u0e70\0\u0ec4\0\u0f18\0\u0f6c\0\u0fc0\0\u1014\0\u1068\0\u10bc"+
-    "\0\u1110\0\u1164\0\u11b8\0\u120c\0\u1260\0\u12b4\0\u1308\0\u135c"+
-    "\0\u13b0\0\u0738\0\u1404\0\u1458\0\u14ac\0\u1500\0\u1554\0\u15a8"+
-    "\0\u15fc\0\u1650\0\u16a4\0\u16f8\0\u174c\0\u17a0\0\u17f4\0\u1848"+
-    "\0\u189c\0\u18f0\0\u1944\0\u1998\0\u19ec\0\u1a40\0\u1a94\0\u1ae8"+
-    "\0\u1b3c\0\u1b90\0\u1be4\0\u1c38\0\u1c8c\0\u1ce0\0\u1d34\0\u1d88"+
-    "\0\u1ddc\0\u1e30\0\u1e84\0\u1ed8\0\u1f2c\0\u1f80\0\u1fd4\0\u2028"+
-    "\0\u207c\0\u20d0\0\u2124\0\u2178\0\u21cc\0\u2220\0\u2274\0\u22c8"+
-    "\0\u231c\0\u2370\0\u23c4\0\u2418\0\u246c\0\u24c0\0\u2514\0\u2568"+
-    "\0\u25bc\0\u2610\0\u2664\0\u26b8\0\u270c\0\u2760\0\u27b4\0\u2808"+
-    "\0\u285c\0\u28b0\0\u2904\0\u2958\0\u29ac\0\u2a00\0\u2a54\0\u2aa8"+
-    "\0\u2afc\0\u2b50\0\u2ba4\0\u2bf8\0\u2c4c\0\u2ca0\0\u2cf4\0\u2d48"+
-    "\0\u2d9c\0\u2df0\0\u2e44\0\u2e98\0\u2eec\0\u2f40\0\u2f94\0\u2fe8"+
-    "\0\u303c\0\u3090\0\u30e4\0\u3138\0\u318c\0\u31e0\0\u3234\0\u3288"+
-    "\0\u32dc\0\u3330\0\u3384\0\u33d8\0\u342c\0\u3480\0\u34d4\0\u3528"+
-    "\0\u357c\0\u35d0\0\u3624\0\u3678\0\u36cc\0\u3720\0\u3774\0\u37c8"+
-    "\0\u381c\0\u3870\0\u38c4\0\u3918\0\u396c\0\u39c0\0\u3a14\0\u3a68"+
-    "\0\u3abc\0\u3b10\0\u3b64\0\u3bb8\0\u3c0c\0\u3c60\0\u3cb4\0\u3d08"+
-    "\0\u3d5c\0\u3db0\0\u3e04\0\u3e58\0\u3eac\0\u3f00\0\u3f54\0\u3fa8"+
-    "\0\u3ffc\0\u4050\0\u40a4\0\u40f8\0\u414c\0\u41a0\0\u41f4\0\u4248"+
-    "\0\u429c\0\u42f0\0\u4344\0\u4398\0\u43ec\0\u4440\0\u4494\0\u44e8"+
-    "\0\u453c\0\u4590\0\u45e4\0\u4638\0\250\0\u468c\0\u46e0\0\u4734"+
-    "\0\u4788\0\u47dc\0\u4830\0\u4884\0\u48d8\0\u492c\0\u4980\0\u49d4"+
-    "\0\u4a28\0\u4a7c\0\u4ad0\0\u4b24\0\u4b78\0\u4bcc\0\u4c20\0\u4c74"+
-    "\0\u4cc8\0\u4d1c\0\u4d70\0\u4dc4\0\u4e18\0\u4e6c\0\u4ec0\0\u4f14"+
-    "\0\u4f68\0\u4fbc\0\u5010\0\u5064\0\u50b8\0\u510c\0\u5160\0\u51b4"+
-    "\0\u5208\0\u525c\0\u52b0\0\u5304\0\u5358\0\u53ac\0\u5400\0\u5454"+
-    "\0\u54a8\0\u54fc\0\u5550\0\u55a4\0\u55f8\0\u564c\0\u56a0\0\u56f4"+
-    "\0\u5748\0\u579c\0\u57f0\0\u5844\0\u5898\0\u58ec\0\u5940\0\u5994"+
-    "\0\u59e8\0\u5a3c\0\u5a90\0\u5ae4\0\u5b38\0\u5b8c\0\u5be0\0\u5c34"+
-    "\0\u5c88\0\u5cdc\0\u5d30\0\u5d84\0\u5dd8\0\u5e2c\0\u5e80\0\u5ed4"+
-    "\0\u5f28\0\u5f7c\0\u5fd0\0\u6024\0\u6078\0\u60cc\0\u6120\0\u6174"+
-    "\0\u61c8\0\u621c\0\u6270\0\u62c4\0\u6318\0\u636c\0\u63c0\0\u6414"+
-    "\0\u6468\0\u64bc\0\u6510\0\u6564\0\u65b8\0\u660c\0\u6660\0\u66b4"+
-    "\0\u6708\0\u675c\0\u67b0\0\u6804\0\u6858\0\u68ac\0\u6900\0\u6954"+
-    "\0\u69a8\0\u69fc\0\u6a50\0\u6aa4\0\u6af8\0\u6b4c\0\u6ba0\0\u6bf4"+
-    "\0\u6c48\0\u6c9c\0\u6cf0\0\u6d44\0\u6d98\0\u6dec\0\u6e40\0\u6e94"+
-    "\0\u6ee8\0\u6f3c\0\u6f90\0\u6fe4\0\u7038\0\u708c\0\u70e0\0\u7134"+
-    "\0\u7188\0\u71dc\0\u7230\0\u7284\0\u72d8\0\u732c\0\u7380\0\u73d4"+
-    "\0\u7428\0\u747c\0\u74d0\0\u7524\0\u7578\0\u75cc\0\u7620\0\u7674"+
-    "\0\u76c8\0\u771c\0\u7770\0\u77c4\0\u7818\0\u786c\0\u78c0\0\u7914"+
-    "\0\u7968\0\u79bc\0\u7a10\0\u7a64\0\u7ab8\0\u7b0c\0\u7b60\0\u7bb4"+
-    "\0\u7c08\0\u7c5c\0\u7cb0\0\u7d04\0\u7d58\0\u7dac\0\u7e00\0\u7e54"+
-    "\0\u7ea8\0\u7efc\0\u7f50\0\u7fa4\0\u7ff8\0\u804c\0\u80a0\0\u80f4"+
-    "\0\u8148\0\u819c\0\u81f0\0\u8244\0\u8298\0\u82ec\0\u8340\0\u8394"+
-    "\0\u83e8\0\u843c\0\u8490\0\u84e4\0\u8538\0\u858c\0\u85e0\0\u8634"+
-    "\0\u8688\0\u86dc\0\u8730\0\u8784\0\u87d8\0\u882c\0\u8880\0\u88d4"+
-    "\0\u8928\0\u897c\0\u89d0\0\u8a24\0\u8a78\0\u8acc\0\u8b20\0\u8b74"+
-    "\0\u8bc8\0\u8c1c\0\u8c70\0\u8cc4\0\u8d18\0\u8d6c\0\u8dc0\0\u8e14"+
-    "\0\u8e68\0\u8ebc\0\u8f10\0\u8f64\0\u8fb8\0\u900c\0\u9060\0\u90b4"+
-    "\0\u9108\0\u915c\0\u91b0\0\u9204\0\u9258\0\u92ac\0\u9300\0\u9354"+
-    "\0\u93a8\0\u93fc\0\u9450\0\u94a4\0\u94f8\0\u954c\0\u95a0\0\u95f4"+
-    "\0\u9648\0\u969c\0\u96f0\0\u9744\0\u9798\0\u97ec\0\u9840\0\u9894"+
-    "\0\u98e8\0\u993c\0\u9990\0\u99e4\0\u9a38\0\u9a8c\0\u9ae0\0\u9b34"+
-    "\0\u9b88\0\u9bdc\0\u9c30\0\u9c84\0\u9cd8\0\u9d2c\0\u9d80\0\u9dd4"+
-    "\0\u9e28\0\u9e7c\0\u9ed0\0\u9f24\0\u9f78\0\u9fcc\0\ua020\0\ua074"+
-    "\0\ua0c8\0\ua11c\0\ua170\0\ua1c4\0\ua218\0\ua26c\0\ua2c0\0\ua314"+
-    "\0\ua368\0\ua3bc\0\ua410\0\ua464\0\ua4b8\0\ua50c\0\ua560\0\ua5b4"+
-    "\0\ua608\0\ua65c\0\ua6b0\0\ua704\0\ua758\0\ua7ac\0\ua800\0\ua854"+
-    "\0\ua8a8\0\ua8fc\0\ua950\0\ua9a4\0\ua9f8\0\uaa4c\0\uaaa0\0\uaaf4"+
-    "\0\uab48\0\uab9c\0\uabf0\0\uac44\0\uac98\0\uacec\0\uad40\0\uad94"+
-    "\0\uade8\0\uae3c\0\uae90\0\uaee4\0\uaf38\0\uaf8c\0\uafe0\0\ub034"+
-    "\0\ub088\0\ub0dc\0\ub130\0\ub184\0\ub1d8\0\ub22c\0\ub280\0\ub2d4"+
-    "\0\ub328\0\ub37c\0\ub3d0\0\ub424\0\ub478\0\ub4cc\0\ub520\0\ub574"+
-    "\0\ub5c8\0\ub61c\0\ub670\0\ub6c4\0\ub718\0\ub76c\0\ub7c0\0\ub814"+
-    "\0\ub868\0\ub8bc\0\ub910\0\ub964\0\ub9b8\0\uba0c\0\uba60\0\ubab4"+
-    "\0\ubb08\0\ubb5c\0\ubbb0\0\ubc04\0\ubc58\0\ubcac\0\ubd00\0\ubd54"+
-    "\0\ubda8\0\ubdfc\0\ube50\0\ubea4\0\ubef8\0\ubf4c\0\ubfa0\0\ubff4"+
-    "\0\uc048\0\uc09c\0\uc0f0\0\uc144\0\uc198\0\uc1ec\0\uc240\0\uc294"+
-    "\0\uc2e8\0\uc33c\0\uc390\0\uc3e4\0\uc438\0\uc48c\0\uc4e0\0\uc534"+
-    "\0\uc588\0\uc5dc\0\uc630\0\uc684\0\uc6d8\0\uc72c\0\uc780\0\uc7d4"+
-    "\0\uc828\0\uc87c\0\uc8d0\0\uc924\0\uc978\0\uc9cc\0\uca20\0\uca74"+
-    "\0\ucac8\0\ucb1c\0\ucb70\0\ucbc4\0\ucc18\0\ucc6c\0\uccc0\0\ucd14"+
-    "\0\ucd68\0\ucdbc\0\uce10\0\uce64\0\uceb8\0\ucf0c\0\ucf60\0\ucfb4"+
-    "\0\ud008\0\ud05c\0\ud0b0\0\ud104\0\ud158\0\ud1ac\0\ud200\0\ud254"+
-    "\0\ud2a8\0\ud2fc\0\ud350\0\ud3a4\0\ud3f8\0\ud44c\0\ud4a0\0\ud4f4"+
-    "\0\ud548\0\ud59c\0\ud5f0\0\ud644\0\ud698\0\ud6ec\0\ud740\0\ud794"+
-    "\0\ud7e8\0\ud83c\0\ud890\0\ud8e4\0\ud938\0\ud98c\0\ud9e0\0\uda34"+
-    "\0\uda88\0\udadc\0\udb30\0\udb84\0\udbd8\0\udc2c\0\udc80\0\udcd4"+
-    "\0\udd28\0\udd7c\0\uddd0\0\ude24\0\ude78\0\udecc\0\udf20\0\udf74"+
-    "\0\udfc8\0\ue01c\0\ue070\0\ue0c4\0\ue118\0\ue16c\0\ue1c0\0\ue214"+
-    "\0\ue268\0\ue2bc\0\ue310\0\ue364\0\ue3b8\0\ue40c\0\ue460\0\ue4b4"+
-    "\0\ue508\0\ue55c\0\ue5b0\0\ue604\0\ue658\0\ue6ac\0\ue700\0\ue754"+
-    "\0\ue7a8\0\ue7fc\0\ue850\0\ue8a4\0\ue8f8\0\ue94c\0\ue9a0\0\ue9f4"+
-    "\0\uea48\0\uea9c\0\ueaf0\0\ueb44\0\ueb98\0\uebec\0\uec40\0\uec94"+
-    "\0\uece8\0\ued3c\0\ued90\0\uede4\0\uee38\0\uee8c\0\ueee0\0\uef34"+
-    "\0\uef88\0\uefdc\0\uf030\0\uf084\0\uf0d8\0\uf12c\0\uf180\0\uf1d4"+
-    "\0\uf228\0\uf27c\0\uf2d0\0\uf324\0\uf378\0\uf3cc\0\uf420\0\uf474"+
-    "\0\uf4c8\0\uf51c\0\uf570\0\uf5c4\0\uf618\0\uf66c\0\uf6c0\0\uf714"+
-    "\0\uf768\0\uf7bc\0\uf810\0\uf864\0\uf8b8\0\uf90c\0\uf960\0\uf9b4"+
-    "\0\ufa08\0\ufa5c\0\ufab0\0\ufb04\0\ufb58\0\ufbac\0\ufc00\0\ufc54"+
-    "\0\ufca8\0\ufcfc\0\ufd50\0\ufda4\0\ufdf8\0\ufe4c\0\ufea0\0\ufef4"+
-    "\0\uff48\0\uff9c\0\ufff0\1\104\1\230\1\354\1\u0140\1\u0194"+
-    "\1\u01e8\1\u023c\1\u0290\1\u02e4\1\u0338\1\u038c\1\u03e0\1\u0434"+
-    "\1\u0488\1\u04dc\1\u0530\1\u0584\1\u05d8\1\u062c\1\u0680\1\u06d4"+
-    "\1\u0728\1\u077c\1\u07d0\1\u0824\1\u0878\1\u08cc\1\u0920\1\u0974"+
-    "\1\u09c8\1\u0a1c\1\u0a70\1\u0ac4\1\u0b18\1\u0b6c\1\u0bc0\1\u0c14"+
-    "\1\u0c68\1\u0cbc\1\u0d10\1\u0d64\1\u0db8\1\u0e0c\1\u0e60\1\u0eb4"+
-    "\1\u0f08\1\u0f5c\1\u0fb0\1\u1004\1\u1058\1\u10ac\1\u1100\1\u1154"+
-    "\1\u11a8\1\u11fc\1\u1250\1\u12a4\1\u12f8\1\u134c\1\u13a0\1\u13f4"+
-    "\1\u1448\1\u149c\1\u14f0\1\u1544\1\u1598\1\u15ec\1\u1640\1\u1694"+
-    "\1\u16e8\1\u173c\1\u1790\1\u17e4\1\u1838\1\u188c\1\u18e0\1\u1934"+
-    "\1\u1988\1\u19dc\1\u1a30\1\u1a84\1\u1ad8\1\u1b2c\1\u1b80\1\u1bd4"+
-    "\1\u1c28\1\u1c7c\1\u1cd0\1\u1d24\1\u1d78\1\u1dcc\1\u1e20\1\u1e74"+
-    "\1\u1ec8\1\u1f1c\1\u1f70\1\u1fc4\1\u2018\1\u206c\1\u20c0\1\u2114"+
-    "\1\u2168\1\u21bc\1\u2210\1\u2264\1\u22b8\1\u230c\1\u2360\1\u23b4"+
-    "\1\u2408\1\u245c\1\u24b0\1\u2504\1\u2558\1\u25ac\1\u2600\1\u2654"+
-    "\1\u26a8\1\u26fc\1\u2750\1\u27a4\1\u27f8\1\u284c\1\u28a0\1\u28f4"+
-    "\1\u2948\1\u299c\1\u29f0\1\u2a44\1\u2a98\1\u2aec\1\u2b40\1\u2b94"+
-    "\1\u2be8\1\u2c3c\1\u2c90\1\u2ce4\1\u2d38\1\u2d8c\1\u2de0\1\u2e34"+
-    "\1\u2e88\1\u2edc\1\u2f30\1\u2f84\1\u2fd8\1\u302c\1\u3080\1\u30d4"+
-    "\1\u3128\1\u317c\1\u31d0\1\u3224\1\u3278\1\u32cc\1\u3320\1\u3374"+
-    "\1\u33c8\1\u341c\1\u3470\1\u34c4\1\u3518\1\u356c\1\u35c0\1\u3614"+
-    "\1\u3668\1\u36bc\1\u3710\1\u3764\1\u37b8\1\u380c\1\u3860\1\u38b4"+
-    "\1\u3908\1\u395c\1\u39b0\1\u3a04\1\u3a58\1\u3aac\1\u3b00\1\u3b54"+
-    "\1\u3ba8\1\u3bfc\1\u3c50\1\u3ca4\1\u3cf8\1\u3d4c\1\u3da0\1\u3df4"+
-    "\1\u3e48\1\u3e9c\1\u3ef0\1\u3f44\1\u3f98\1\u3fec\1\u4040\1\u4094"+
-    "\1\u40e8\1\u413c\1\u4190\1\u41e4\1\u4238\1\u428c\1\u42e0\1\u4334"+
-    "\1\u4388\1\u43dc\1\u4430\1\u4484\1\u44d8\1\u452c\1\u4580\1\u45d4"+
-    "\1\u4628\1\u467c\1\u46d0\1\u4724\1\u4778\1\u47cc\1\u4820\1\u4874"+
-    "\1\u48c8\1\u491c\1\u4970\1\u49c4\1\u4a18\1\u4a6c\1\u4ac0\1\u4b14"+
-    "\1\u4b68\1\u4bbc\1\u4c10\1\u4c64\1\u4cb8\1\u4d0c\1\u4d60\1\u4db4"+
-    "\1\u4e08\1\u4e5c\1\u4eb0\1\u4f04\1\u4f58\1\u4fac\1\u5000\1\u5054"+
-    "\1\u50a8\1\u50fc\1\u5150\1\u51a4\1\u51f8\1\u524c\1\u52a0\1\u52f4"+
-    "\1\u5348\1\u539c\1\u53f0\1\u5444\1\u5498\1\u54ec\1\u5540\1\u5594"+
-    "\1\u55e8\1\u563c\1\u5690\1\u56e4\1\u5738\1\u578c\1\u57e0\1\u5834"+
-    "\1\u5888\1\u58dc\1\u5930\1\u5984\1\u59d8\1\u5a2c\1\u5a80\1\u5ad4"+
-    "\1\u5b28\1\u5b7c\1\u5bd0\1\u5c24\1\u5c78\1\u5ccc\1\u5d20\1\u5d74"+
-    "\1\u5dc8\1\u5e1c\1\u5e70\1\u5ec4\1\u5f18\1\u5f6c\1\u5fc0\1\u6014"+
-    "\1\u6068\1\u60bc\1\u6110\1\u6164\1\u61b8\1\u620c\1\u6260\1\u62b4"+
-    "\1\u6308\1\u635c\1\u63b0\1\u6404\1\u6458\1\u64ac\1\u6500\1\u6554"+
-    "\1\u65a8\1\u65fc\1\u6650\1\u66a4\1\u66f8\1\u674c\1\u67a0\1\u67f4"+
-    "\1\u6848\1\u689c\1\u68f0\1\u6944\1\u6998\1\u69ec\1\u6a40\1\u6a94"+
-    "\1\u6ae8\1\u6b3c\1\u6b90\1\u6be4\1\u6c38\1\u6c8c\1\u6ce0\1\u6d34"+
-    "\1\u6d88\1\u6ddc\1\u6e30\1\u6e84\1\u6ed8\1\u6f2c\1\u6f80\1\u6fd4"+
-    "\1\u7028\1\u707c\1\u70d0\1\u7124\1\u7178\1\u71cc\1\u7220\1\u7274"+
-    "\1\u72c8\1\u731c\1\u7370\1\u73c4\1\u7418\1\u746c\1\u74c0\1\u7514"+
-    "\1\u7568\1\u75bc\1\u7610\1\u7664\1\u76b8\1\u770c\1\u7760\1\u77b4"+
-    "\1\u7808\1\u785c\1\u78b0\1\u7904\1\u7958\1\u79ac\1\u7a00\1\u7a54"+
-    "\1\u7aa8\1\u7afc\1\u7b50\1\u7ba4\1\u7bf8\1\u7c4c\1\u7ca0\1\u7cf4"+
-    "\1\u7d48\1\u7d9c\1\u7df0\1\u7e44\1\u7e98\1\u7eec\1\u7f40\1\u7f94"+
-    "\1\u7fe8\1\u803c\1\u8090\1\u80e4\1\u8138\1\u818c\1\u81e0\1\u8234"+
-    "\1\u8288\1\u82dc\1\u8330\1\u8384\1\u83d8\1\u842c\1\u8480\1\u84d4"+
-    "\1\u8528\1\u857c\1\u85d0\1\u8624\1\u8678\1\u86cc\1\u8720\1\u8774"+
-    "\1\u87c8\1\u881c\1\u8870\1\u88c4\1\u8918\1\u896c\1\u89c0\1\u8a14"+
-    "\1\u8a68\1\u8abc\1\u8b10\1\u8b64\1\u8bb8\1\u8c0c\1\u8c60\1\u8cb4"+
-    "\1\u8d08\1\u8d5c\1\u8db0\1\u8e04\1\u8e58\1\u8eac\1\u8f00\1\u8f54"+
-    "\1\u8fa8\1\u8ffc\1\u9050\1\u90a4\1\u90f8\1\u914c\1\u91a0\1\u91f4"+
-    "\1\u9248\1\u929c\1\u92f0\1\u9344\1\u9398\1\u93ec\1\u9440\1\u9494"+
-    "\1\u94e8\1\u953c\1\u9590\1\u95e4\1\u9638\1\u968c\1\u96e0\1\u9734"+
-    "\1\u9788\1\u97dc\1\u9830\1\u9884\1\u98d8\1\u992c\1\u9980\1\u99d4"+
-    "\1\u9a28\1\u9a7c\1\u9ad0\1\u9b24\1\u9b78\1\u9bcc\1\u9c20\1\u9c74"+
-    "\1\u9cc8\1\u9d1c\1\u9d70\1\u9dc4\1\u9e18\1\u9e6c\1\u9ec0\1\u9f14"+
-    "\1\u9f68\1\u9fbc\1\ua010\1\ua064\1\ua0b8\1\ua10c\1\ua160\1\ua1b4"+
-    "\1\ua208\1\ua25c\1\ua2b0\1\ua304\1\ua358\1\ua3ac\0\250\0\374"+
-    "\0\374\0\u0a80\0\u0a80\0\u0ad4\0\u0b28\0\u0ec4\0\u03f0\1\ua400"+
-    "\1\ua454\0\u04ec\0\u7d58\1\ua4a8\1\ua4fc\0\u0f6c\0\u0fc0\1\ua550"+
-    "\0\u0348\0\u1014\0\u0ccc\1\ua5a4\1\ua5f8\1\ua64c\0\250\1\ua6a0"+
-    "\1\ua6f4\1\ua748\1\ua79c\1\ua7f0\1\ua844\1\ua898\1\ua8ec\1\ua940"+
-    "\1\ua994\0\u5dd8\1\ua9e8\1\uaa3c\1\uaa90\1\uaae4\1\uab38\1\uab8c"+
-    "\1\uabe0\1\uac34\1\uac88\1\uacdc\0\u69fc\1\uad30\1\uad84\1\uadd8"+
-    "\1\uae2c\1\uae80\1\uaed4\1\uaf28\1\uaf7c\1\uafd0\1\ub024\1\ub078"+
-    "\1\ub0cc\1\ub120\1\ub174\1\ub1c8\1\ub21c\1\ub270\1\ub2c4\1\ub318"+
-    "\1\ub36c\0\u6708\1\ub3c0\1\ub414\1\ub468\1\ub4bc\1\ub510\1\ub564"+
-    "\1\ub5b8\1\ub60c\1\ub660\1\ub6b4\1\ub708\1\ub75c\1\ub7b0\1\ub804"+
-    "\1\ub858\1\ub8ac\0\u8880\1\ub900\1\ub954\0\u7bb4\1\ub9a8\0\u95f4"+
-    "\1\ub9fc\1\uba50\1\ubaa4\1\ubaf8\1\ubb4c\1\ubba0\1\ubbf4\1\ubc48"+
-    "\1\ubc9c\1\ubcf0\1\ubd44\1\ubd98\1\ubdec\1\ube40\1\ube94\1\ubee8"+
-    "\1\ubf3c\1\ubf90\1\ubfe4\1\uc038\1\uc08c\1\uc0e0\1\uc134\1\uc188"+
-    "\1\uc1dc\1\uc230\1\uc284\1\uc2d8\1\uc32c\1\uc380\1\uc3d4\1\uc428"+
-    "\1\uc47c\1\uc4d0\1\uc524\1\uc578\1\uc5cc\1\ub2c4\1\uc620\1\uc674"+
-    "\1\uc6c8\1\uc71c\1\uc770\1\uc7c4\1\uc818\1\uc86c\1\uc8c0\1\uc914"+
-    "\1\uc968\1\uc9bc\1\uca10\1\uca64\1\ucab8\1\ucb0c\1\ucb60\1\ucbb4"+
-    "\1\ucc08\1\ucc5c\1\uccb0\1\ucd04\1\ucd58\1\ucdac\1\uce00\1\uce54"+
-    "\1\ucea8\0\u80f4\1\ucefc\1\ucf50\1\ucfa4\1\ucff8\1\ud04c\1\ud0a0"+
-    "\1\ud0f4\1\ud148\1\ud19c\1\ud1f0\1\ud244\1\ud298\1\ud2ec\1\ud340"+
-    "\1\ud394\1\ud3e8\1\ud43c\1\ud490\1\ud4e4\1\ud538\1\ud58c\1\ud5e0"+
-    "\1\ud634\1\ud688\1\ud6dc\1\ud730\1\ud784\1\ud7d8\1\ud82c\1\ud880"+
-    "\1\ud8d4\1\ud928\1\ud97c\1\ud9d0\1\uda24\1\uda78\1\udacc\1\udb20"+
-    "\1\udb74\1\udbc8\1\udc1c\1\udc70\1\udcc4\1\udd18\1\udd6c\1\uddc0"+
-    "\1\ude14\1\ude68\1\udebc\1\udf10\1\udf64\1\udfb8\1\ue00c\1\ue060"+
-    "\1\ub318\1\ue0b4\1\ue108\1\ue15c\1\ue1b0\1\ue204\1\ue258\1\ue2ac"+
-    "\1\ue300\1\ue354\1\ue3a8\0\u5454\1\ue3fc\1\ue450\1\ue4a4\1\ue4f8"+
-    "\1\ue54c\1\ue5a0\1\ue5f4\1\ue648\1\ue69c\1\ue6f0\1\ue744\1\ue798"+
-    "\0\u8bc8\1\ucc08\1\ue7ec\1\ue840\1\ue894\1\ue8e8\1\ue93c\1\ue990"+
-    "\1\ue9e4\1\uea38\1\uea8c\1\ueae0\1\ueb34\1\ueb88\1\uebdc\1\uec30"+
-    "\1\uec84\1\uecd8\1\ued2c\1\ued80\1\uedd4\1\uee28\1\uee7c\1\ueed0"+
-    "\1\uef24\1\uef78\1\uefcc\1\uf020\1\uf074\1\uf0c8\1\uf11c\1\uf170"+
-    "\1\uf1c4\1\uf218\1\uf26c\1\uf2c0\1\uf314\1\ud19c\1\uf368\1\uf3bc"+
-    "\1\uf410\1\uf464\1\uf4b8\1\uf50c\1\uf560\1\ue354\1\uf5b4\1\uf608"+
-    "\1\uf65c\1\uf6b0\1\uf704\1\uf758\1\uf7ac\1\uf800\1\uf854\1\uf8a8"+
-    "\1\uf8fc\1\uf950\1\uf9a4\1\uf9f8\1\ufa4c\1\ufaa0\1\ufaf4\1\ufb48"+
-    "\1\ufb9c\1\ufbf0\1\ufc44\1\ufc98\1\ufcec\1\ufd40\1\ufd94\0\u7968"+
-    "\1\ufde8\1\ufe3c\1\ufe90\1\ue93c\1\ufee4\1\uff38\1\uff8c\1\uffe0"+
-    "\2\64\2\210\2\334\2\u0130\2\u0184\2\u01d8\2\u022c\2\u0280"+
-    "\2\u02d4\0\u6120\0\u9204\2\u0328\2\u037c\2\u03d0\2\u0424\2\u0478"+
-    "\2\u04cc\2\u0520\2\u0574\2\u05c8\2\u061c\2\u0670\2\u06c4\0\u621c"+
-    "\2\u0718\2\u076c\2\u07c0\2\u0814\2\u0868\2\u08bc\2\u0910\2\u0964"+
-    "\2\u09b8\2\u0a0c\2\u0a60\2\u0ab4\2\u0b08\2\u0b5c\2\u0bb0\2\u0c04"+
-    "\2\u0c58\2\u0cac\2\u0d00\2\u0d54\2\u0da8\2\u0dfc\2\u0e50\2\u0ea4"+
-    "\2\u0ef8\2\u0f4c\2\u0fa0\2\u0ff4\2\u1048\2\u109c\2\u10f0\2\u1144"+
-    "\2\u1198\2\u11ec\2\u1240\2\u1294\2\u12e8\2\u133c\2\u1390\2\u13e4"+
-    "\2\u1438\2\u148c\2\u14e0\2\u1534\2\u1588\2\u15dc\2\u1630\0\u81f0"+
-    "\2\u1684\2\u16d8\2\u172c\2\u1780\2\u17d4\2\u1828\2\u187c\2\u18d0"+
-    "\2\u1924\2\u1978\2\u19cc\2\u1a20\2\u1a74\2\u1ac8\2\u1b1c\2\u1b70"+
-    "\2\u1bc4\2\u1c18\2\u1c6c\2\u1cc0\2\u1d14\2\u1d68\2\u1dbc\2\u1e10"+
-    "\2\u1e64\2\u1eb8\2\u1f0c\2\u1f60\2\u1fb4\2\u2008\2\u205c\2\u20b0"+
-    "\2\u2104\2\u2158\2\u21ac\2\u2200\2\u2254\2\u22a8\2\u22fc\2\u2350"+
-    "\2\u23a4\2\u23f8\2\u244c\2\u24a0\2\u24f4\2\u2548\2\u259c\2\u25f0"+
-    "\0\u8244\2\u2644\2\u2698\2\u26ec\2\u2740\2\u2794\1\ubdec\2\u27e8"+
-    "\2\u283c\2\u2890\2\u28e4\2\u2938\2\u298c\2\u29e0\2\u2a34\2\u2a88"+
-    "\2\u2adc\2\u2b30\2\u2b84\2\u2bd8\2\u2c2c\2\u2c80\2\u2cd4\2\u2d28"+
-    "\2\u2d7c\2\u2dd0\2\u2e24\2\u2e78\2\u2ecc\2\u2f20\2\u2f74\2\u2fc8"+
-    "\2\u301c\2\u3070\2\u30c4\2\u3118\2\u316c\2\u31c0\2\u3214\2\u3268"+
-    "\2\u32bc\2\u1bc4\2\u3310\2\u3364\2\u33b8\2\u340c\2\u3460\2\u34b4"+
-    "\2\u09b8\2\u3508\2\u355c\2\u35b0\2\u3604\2\u3658\2\u36ac\2\u3700"+
-    "\2\u3754\2\u37a8\2\u37fc\2\u3850\2\u38a4\2\u38f8\2\u394c\2\u39a0"+
-    "\2\u39f4\2\u3a48\2\u3a9c\2\u3af0\2\u3b44\2\u3b98\2\u3bec\2\u3c40"+
-    "\2\u3c94\2\u3ce8\2\u3d3c\2\u3d90\2\u3de4\2\u3e38\2\u3e8c\0\u747c"+
-    "\2\u3ee0\2\u3f34\2\u3f88\2\u3fdc\2\u4030\2\u4084\2\u40d8\2\u412c"+
-    "\2\u4180\2\u41d4\2\u4228\2\u427c\2\u42d0\0\u2274\0\ud1ac\2\u4324"+
-    "\2\u4378\2\u43cc\0\u0348\2\u4420\2\u4474\2\u44c8\2\u451c\2\u4570"+
-    "\2\u45c4\2\u4618\2\u466c\2\u46c0\2\u4714\0\ub22c\2\u4768\2\u47bc"+
-    "\2\u4810\2\u4864\2\u48b8\2\u490c\2\u4960\2\u49b4\2\u4a08\2\u4a5c"+
-    "\0\ube50\2\u4ab0\2\u4b04\2\u4b58\2\u4bac\2\u4c00\2\u4c54\2\u4ca8"+
-    "\2\u4cfc\2\u4d50\2\u4da4\2\u4df8\2\u4e4c\2\u4ea0\2\u4ef4\2\u4f48"+
-    "\2\u4f9c\2\u4ff0\2\u5044\2\u5098\2\u50ec\0\ubb5c\2\u5140\2\u5194"+
-    "\2\u51e8\2\u523c\2\u5290\2\u52e4\2\u5338\2\u538c\2\u53e0\2\u5434"+
-    "\2\u5488\2\u54dc\2\u5530\2\u5584\2\u55d8\2\u562c\0\udcd4\2\u5680"+
-    "\2\u56d4\0\ud008\2\u5728\0\uea48\2\u577c\2\u57d0\2\u5824\2\u5878"+
-    "\2\u58cc\2\u5920\2\u5974\2\u59c8\2\u5a1c\2\u5a70\2\u5ac4\2\u5b18"+
-    "\2\u5b6c\2\u5bc0\2\u5c14\2\u5c68\2\u5cbc\2\u5d10\2\u5d64\2\u5db8"+
-    "\2\u5e0c\2\u5e60\2\u5eb4\2\u5f08\2\u5f5c\2\u5fb0\2\u6004\2\u6058"+
-    "\2\u60ac\2\u6100\2\u6154\2\u61a8\2\u61fc\2\u6250\2\u62a4\2\u62f8"+
-    "\2\u634c\2\u5044\2\u63a0\2\u63f4\2\u6448\2\u649c\2\u64f0\2\u6544"+
-    "\2\u6598\2\u65ec\2\u6640\2\u6694\2\u66e8\2\u673c\2\u6790\2\u67e4"+
-    "\2\u6838\2\u688c\2\u68e0\2\u6934\2\u6988\2\u69dc\2\u6a30\2\u6a84"+
-    "\2\u6ad8\2\u6b2c\2\u6b80\2\u6bd4\2\u6c28\0\ud548\2\u6c7c\2\u6cd0"+
-    "\2\u6d24\2\u6d78\2\u6dcc\2\u6e20\2\u6e74\2\u6ec8\2\u6f1c\2\u6f70"+
-    "\2\u6fc4\2\u7018\2\u706c\2\u70c0\2\u7114\2\u7168\2\u71bc\2\u7210"+
-    "\2\u7264\2\u72b8\2\u730c\2\u7360\2\u73b4\2\u7408\2\u745c\2\u74b0"+
-    "\2\u7504\2\u7558\2\u75ac\2\u7600\2\u7654\2\u76a8\2\u76fc\2\u7750"+
-    "\2\u77a4\2\u77f8\2\u784c\2\u78a0\2\u78f4\2\u7948\2\u799c\2\u79f0"+
-    "\2\u7a44\2\u7a98\2\u7aec\2\u7b40\2\u7b94\2\u7be8\2\u7c3c\2\u7c90"+
-    "\2\u7ce4\2\u7d38\2\u7d8c\2\u7de0\2\u5098\2\u7e34\2\u7e88\2\u7edc"+
-    "\2\u7f30\2\u7f84\2\u7fd8\2\u802c\2\u8080\2\u80d4\2\u8128\0\ua8a8"+
-    "\2\u817c\2\u81d0\2\u8224\2\u8278\2\u82cc\2\u8320\2\u8374\2\u83c8"+
-    "\2\u841c\2\u8470\2\u84c4\2\u8518\0\ue01c\2\u6988\2\u856c\2\u85c0"+
-    "\2\u8614\2\u8668\2\u86bc\2\u8710\2\u8764\2\u87b8\2\u880c\2\u8860"+
-    "\2\u88b4\2\u8908\2\u895c\2\u89b0\2\u8a04\2\u8a58\2\u8aac\2\u8b00"+
-    "\2\u8b54\2\u8ba8\2\u8bfc\2\u8c50\2\u8ca4\2\u8cf8\2\u8d4c\2\u8da0"+
-    "\2\u8df4\2\u8e48\2\u8e9c\2\u8ef0\2\u8f44\2\u8f98\2\u8fec\2\u9040"+
-    "\2\u9094\2\u6f1c\2\u90e8\2\u913c\2\u9190\2\u91e4\2\u9238\2\u928c"+
-    "\2\u92e0\2\u80d4\2\u9334\2\u9388\2\u93dc\2\u9430\2\u9484\2\u94d8"+
-    "\2\u952c\2\u9580\2\u95d4\2\u9628\2\u967c\2\u96d0\2\u9724\2\u9778"+
-    "\2\u97cc\2\u9820\2\u9874\2\u98c8\2\u991c\2\u9970\2\u99c4\2\u9a18"+
-    "\2\u9a6c\2\u9ac0\2\u9b14\0\ucdbc\2\u9b68\2\u9bbc\2\u9c10\2\u86bc"+
-    "\2\u9c64\2\u9cb8\2\u9d0c\2\u9d60\2\u9db4\2\u9e08\2\u9e5c\2\u9eb0"+
-    "\2\u9f04\2\u9f58\2\u9fac\2\ua000\2\ua054\0\ub574\0\ue658\2\ua0a8"+
-    "\2\ua0fc\2\ua150\2\ua1a4\2\ua1f8\2\ua24c\2\ua2a0\2\ua2f4\2\ua348"+
-    "\2\ua39c\2\ua3f0\2\ua444\0\ub670\2\ua498\2\ua4ec\2\ua540\2\ua594"+
-    "\2\ua5e8\2\ua63c\2\ua690\2\ua6e4\2\ua738\2\ua78c\2\ua7e0\2\ua834"+
-    "\2\ua888\2\ua8dc\2\ua930\2\ua984\2\ua9d8\2\uaa2c\2\uaa80\2\uaad4"+
-    "\2\uab28\2\uab7c\2\uabd0\2\uac24\2\uac78\2\uaccc\2\uad20\2\uad74"+
-    "\2\uadc8\2\uae1c\2\uae70\2\uaec4\2\uaf18\2\uaf6c\2\uafc0\2\ub014"+
-    "\2\ub068\2\ub0bc\2\ub110\2\ub164\2\ub1b8\2\ub20c\2\ub260\2\ub2b4"+
-    "\2\ub308\2\ub35c\2\ub3b0\0\ud644\2\ub404\2\ub458\2\ub4ac\2\ub500"+
-    "\2\ub554\2\ub5a8\2\ub5fc\2\ub650\2\ub6a4\2\ub6f8\2\ub74c\2\ub7a0"+
-    "\2\ub7f4\2\ub848\2\ub89c\2\ub8f0\2\ub944\2\ub998\2\ub9ec\2\uba40"+
-    "\2\uba94\2\ubae8\2\ubb3c\2\ubb90\2\ubbe4\2\ubc38\2\ubc8c\2\ubce0"+
-    "\2\ubd34\2\ubd88\2\ubddc\2\ube30\2\ube84\2\ubed8\2\ubf2c\2\ubf80"+
-    "\2\ubfd4\2\uc028\2\uc07c\2\uc0d0\2\uc124\2\uc178\2\uc1cc\2\uc220"+
-    "\2\uc274\2\uc2c8\2\uc31c\2\uc370\0\ud698\2\uc3c4\2\uc418\2\uc46c"+
-    "\2\uc4c0\2\uc514\2\u5b6c\2\uc568\2\uc5bc\2\uc610\2\uc664\2\uc6b8"+
-    "\2\uc70c\2\uc760\2\uc7b4\2\uc808\2\uc85c\2\uc8b0\2\uc904\2\uc958"+
-    "\2\uc9ac\2\uca00\2\uca54\2\ucaa8\2\ucafc\2\ucb50\2\ucba4\2\ucbf8"+
-    "\2\ucc4c\2\ucca0\2\uccf4\2\ucd48\2\ucd9c\2\ucdf0\2\uce44\2\uce98"+
-    "\2\uceec\2\ucf40\2\ucf94\2\ucfe8\2\ud03c\2\ub944\2\ud090\2\ud0e4"+
-    "\2\ud138\2\ud18c\2\ud1e0\2\ud234\2\ua738\2\ud288\2\ud2dc\2\ud330"+
-    "\2\ud384\2\ud3d8\2\ud42c\2\ud480\2\ud4d4\2\ud528\2\ud57c\2\ud5d0"+
-    "\2\ud624\2\ud678\2\ud6cc\2\ud720\2\ud774\2\ud7c8\2\ud81c\2\ud870"+
-    "\2\ud8c4\2\ud918\2\ud96c\2\ud9c0\2\uda14\2\uda68\2\udabc\2\udb10"+
-    "\2\udb64\2\udbb8\0\uc8d0\2\udc0c\2\udc60\2\udcb4\2\udd08\2\udd5c"+
-    "\2\uddb0\2\ude04\2\ude58\2\udeac\2\udf00\2\udf54\2\udfa8\2\udffc"+
-    "\2\ue050\2\ue0a4\2\ue0f8\2\ue14c\2\ue1a0\2\ue1f4\2\ue248\2\ue29c"+
-    "\2\ue2f0\2\ue344\2\ue398\2\ue3ec\2\ue440\2\ue494\2\ue4e8\2\ue53c"+
-    "\2\ue590\2\ue5e4\2\ue638\2\ue68c\2\ue6e0\2\ue734\2\ue788\2\ue7dc"+
-    "\2\ue830\2\ue884\2\ue8d8\2\ue92c\2\ue980\2\ue9d4\2\uea28\2\uea7c"+
-    "\2\uead0\2\ueb24\2\ueb78\2\uebcc\2\uec20\2\uec74\2\uecc8\2\ued1c"+
-    "\2\ued70\2\uedc4\2\uee18\2\uee6c\2\ueec0\2\uef14\2\uef68\2\uefbc"+
-    "\2\uf010\2\uf064\2\uf0b8\2\uf10c\2\uf160\2\uf1b4\2\uf208\2\uf25c"+
-    "\2\uf2b0\2\uf304\2\uf358\2\uf3ac\2\uf400\2\uf454\2\uf4a8\2\uf4fc"+
-    "\2\uf550\2\uf5a4\2\uf5f8\2\uf64c\2\uf6a0\2\uf6f4\2\uf748\2\uf79c"+
-    "\2\uf7f0\2\uf844\2\uf898\2\uf8ec\2\uf940\2\uf994\2\uf9e8\2\ufa3c"+
-    "\2\ufa90\2\ufae4\2\ufb38\2\ufb8c\2\ufbe0\2\ufc34\2\ufc88\2\ufcdc"+
-    "\2\ufd30\2\ufd84\2\ufdd8\2\ufe2c\2\ufe80\2\ufed4\2\uff28\2\uff7c"+
-    "\2\uffd0\3\44\3\170\3\314\3\u0120\3\u0174\3\u01c8\3\u021c"+
-    "\3\u0270\3\u02c4\3\u0318\3\u036c\3\u03c0\3\u0414\3\u0468\3\u04bc"+
-    "\3\u0510\3\u0564\3\u05b8\3\u060c\3\u0660\3\u06b4\3\u0708\3\u075c"+
-    "\3\u07b0\3\u0804\3\u0858\3\u08ac\3\u0900\3\u0954\3\u09a8\3\u09fc"+
-    "\3\u0a50\3\u0aa4\3\u0af8\3\u0b4c\3\u0ba0\3\u0bf4\3\u0c48\3\u0c9c"+
-    "\3\u0cf0\3\u0d44\3\u0d98\3\u0dec\3\u0e40\3\u0e94\3\u0ee8\3\u0f3c"+
-    "\3\u0f90\3\u0fe4\3\u1038\3\u108c\3\u10e0\3\u1134\3\u1188\3\u11dc"+
-    "\3\u1230\3\u1284\3\u12d8\3\u132c\3\u1380\3\u13d4\3\u1428\3\u147c"+
-    "\3\u14d0\3\u1524\3\u1578\3\u15cc\3\u1620\3\u1674\3\u16c8\3\u171c"+
-    "\3\u1770\3\u17c4\3\u1818\3\u186c\3\u18c0\3\u1914\3\u1968\3\u19bc"+
-    "\3\u1a10\3\u1a64\3\u1ab8\3\u1b0c\3\u1b60\3\u1bb4\3\u1c08\3\u1c5c"+
-    "\3\u1cb0\3\u1d04\3\u1d58\3\u1dac\3\u1e00\3\u1e54\3\u1ea8\3\u1efc"+
-    "\3\u1f50\3\u1fa4\3\u1ff8\3\u204c\3\u20a0\3\u20f4\3\u2148\3\u219c"+
-    "\3\u21f0\3\u2244\3\u2298\3\u22ec\3\u2340\3\u2394\3\u23e8\3\u243c"+
-    "\3\u2490\3\u24e4\3\u2538\3\u258c\3\u25e0\3\u2634\3\u2688\3\u26dc"+
-    "\3\u2730\3\u2784\3\u27d8\3\u282c\3\u2880\3\u28d4\3\u2928\3\u297c"+
-    "\3\u29d0\3\u2a24\3\u2a78\3\u2acc\3\u2b20\3\u2b74\3\u2bc8\3\u2c1c"+
-    "\3\u2c70\3\u2cc4\3\u2d18\3\u2d6c\3\u2dc0\3\u2e14\3\u2e68\3\u2ebc"+
-    "\3\u2f10\3\u2f64\3\u2fb8\3\u300c\3\u3060\3\u30b4\3\u3108\3\u315c"+
-    "\3\u31b0\3\u3204\3\u3258\3\u32ac\3\u3300\3\u3354\3\u33a8\3\u33fc"+
-    "\3\u3450\3\u34a4\3\u34f8\3\u354c\3\u35a0\3\u35f4\3\u3648\3\u369c"+
-    "\3\u36f0\3\u3744\3\u3798\3\u37ec\3\u3840\3\u3894\3\u38e8\3\u393c"+
-    "\3\u3990\3\u39e4\3\u3a38\3\u3a8c\3\u3ae0\3\u3b34\3\u3b88\3\u3bdc"+
-    "\3\u3c30\3\u3c84\3\u3cd8\3\u3d2c\3\u3d80\3\u3dd4\3\u3e28\3\u3e7c"+
-    "\3\u3ed0\3\u3f24\3\u3f78\3\u3fcc\3\u4020\3\u4074\3\u40c8\3\u411c"+
-    "\3\u4170\3\u41c4\3\u4218\3\u426c\3\u42c0\3\u4314\3\u4368\3\u43bc"+
-    "\3\u4410\3\u4464\3\u44b8\3\u450c\3\u4560\3\u45b4\3\u4608\3\u465c"+
-    "\3\u46b0\3\u4704\3\u4758\3\u47ac\3\u4800\3\u4854\3\u48a8\3\u48fc"+
-    "\3\u4950\3\u49a4\3\u49f8\3\u4a4c\3\u4aa0\3\u4af4\3\u4b48\3\u4b9c"+
-    "\3\u4bf0\3\u4c44\3\u4c98\3\u4cec\3\u4d40\3\u4d94\3\u4de8\3\u4e3c"+
-    "\3\u4e90\3\u4ee4\3\u4f38\3\u4f8c\3\u4fe0\3\u5034\3\u5088\3\u50dc"+
-    "\3\u5130\3\u5184\3\u51d8\3\u522c\3\u5280\3\u52d4\3\u5328\3\u537c"+
-    "\3\u53d0\3\u5424\3\u5478\3\u54cc\3\u5520\3\u5574\3\u55c8\3\u561c"+
-    "\3\u5670\3\u56c4\3\u5718\3\u576c\3\u57c0\3\u5814\3\u5868\3\u58bc"+
-    "\3\u5910\3\u5964\3\u59b8\3\u5a0c\3\u5a60\3\u5ab4\3\u5b08\3\u5b5c"+
-    "\3\u5bb0\3\u5c04\3\u5c58\3\u5cac\3\u5d00\3\u5d54\3\u5da8\3\u5dfc"+
-    "\3\u5e50\3\u5ea4\3\u5ef8\3\u5f4c\3\u5fa0\3\u5ff4\3\u6048\3\u609c"+
-    "\3\u60f0\3\u6144\3\u6198\3\u61ec\3\u6240\3\u6294\3\u62e8\3\u633c"+
-    "\3\u6390\3\u63e4\3\u6438\3\u648c\3\u64e0\3\u6534\3\u6588\3\u65dc"+
-    "\3\u6630\3\u6684\3\u66d8\3\u672c\3\u6780\3\u67d4\3\u6828\3\u687c"+
-    "\3\u68d0\3\u6924\3\u6978\3\u69cc\3\u6a20\3\u6a74\3\u6ac8\3\u6b1c"+
-    "\3\u6b70\3\u6bc4\3\u6c18\3\u6c6c\3\u6cc0\3\u6d14\3\u6d68\3\u6dbc"+
-    "\3\u6e10\3\u6e64\3\u6eb8\3\u6f0c\3\u6f60\3\u6fb4\3\u7008\3\u705c"+
-    "\3\u70b0\3\u7104\3\u7158\3\u71ac\3\u7200\3\u7254\3\u72a8\3\u72fc"+
-    "\3\u7350\3\u73a4\3\u73f8\3\u744c\3\u74a0\3\u74f4\3\u7548\3\u759c"+
-    "\3\u75f0\3\u7644\3\u7698\3\u76ec\3\u7740\3\u7794\3\u77e8\3\u783c"+
-    "\3\u7890\3\u78e4\3\u7938\3\u798c\3\u79e0\3\u7a34\3\u7a88\3\u7adc"+
-    "\3\u7b30\3\u7b84\3\u7bd8\3\u7c2c\3\u7c80\3\u7cd4\3\u7d28\3\u7d7c"+
-    "\3\u7dd0\3\u7e24\3\u7e78\3\u7ecc\3\u7f20\3\u7f74\3\u7fc8\3\u801c"+
-    "\3\u8070\3\u80c4\3\u8118\3\u816c\3\u81c0\3\u8214\3\u8268\3\u82bc"+
-    "\3\u8310\3\u8364\3\u83b8\3\u840c\3\u8460\3\u84b4\3\u8508\3\u855c"+
-    "\3\u85b0\3\u8604\3\u8658\3\u86ac\3\u8700\3\u8754\3\u87a8\3\u87fc"+
-    "\3\u8850\3\u88a4\3\u88f8\3\u894c\3\u89a0\3\u89f4\3\u8a48\3\u8a9c"+
-    "\3\u8af0\3\u8b44\3\u8b98\3\u8bec\3\u8c40\3\u8c94\3\u8ce8\3\u8d3c"+
-    "\3\u8d90\3\u8de4\3\u8e38\3\u8e8c\3\u8ee0\3\u8f34\3\u8f88\3\u8fdc"+
-    "\3\u9030\3\u9084\3\u90d8\3\u912c\3\u9180\3\u91d4\3\u9228\3\u927c"+
-    "\3\u92d0\3\u9324\3\u9378\3\u93cc\3\u9420\3\u9474\3\u94c8\3\u951c"+
-    "\3\u9570\3\u95c4\3\u9618\3\u966c\3\u96c0\3\u9714\3\u9768\3\u97bc"+
-    "\3\u9810\3\u9864\3\u98b8\3\u990c\3\u9960\3\u99b4\3\u9a08\3\u9a5c"+
-    "\3\u9ab0\3\u9b04\3\u9b58\3\u9bac\3\u9c00\3\u9c54\3\u9ca8\3\u9cfc"+
-    "\3\u9d50\3\u9da4\3\u9df8\3\u9e4c\3\u9ea0\3\u9ef4\3\u9f48\3\u9f9c"+
-    "\3\u9ff0\3\ua044\3\ua098\3\ua0ec\3\ua140\3\ua194\3\ua1e8\3\ua23c"+
-    "\3\ua290\3\ua2e4\3\ua338\3\ua38c\3\ua3e0\3\ua434\3\ua488\3\ua4dc"+
-    "\3\ua530\3\ua584\3\ua5d8\3\ua62c\3\ua680\3\ua6d4\3\ua728\3\ua77c"+
-    "\3\ua7d0\3\ua824\3\ua878\3\ua8cc\3\ua920\3\ua974\3\ua9c8\3\uaa1c"+
-    "\3\uaa70\3\uaac4\3\uab18\3\uab6c\3\uabc0\3\uac14\3\uac68\3\uacbc"+
-    "\3\uad10\3\uad64\3\uadb8\3\uae0c\3\uae60\3\uaeb4\3\uaf08\3\uaf5c"+
-    "\3\uafb0\3\ub004\3\ub058\3\ub0ac\3\ub100\3\ub154\3\ub1a8\3\ub1fc"+
-    "\3\ub250\3\ub2a4\3\ub2f8\3\ub34c\3\ub3a0\3\ub3f4\3\ub448\3\ub49c"+
-    "\3\ub4f0\3\ub544\3\ub598\3\ub5ec\3\ub640\3\ub694\3\ub6e8\3\ub73c"+
-    "\3\ub790\3\ub7e4\3\ub838\3\ub88c\3\ub8e0\3\ub934\3\ub988\3\ub9dc"+
-    "\3\uba30\3\uba84\3\ubad8\3\ubb2c\3\ubb80\3\ubbd4\3\ubc28\3\ubc7c"+
-    "\3\ubcd0\3\ubd24\3\ubd78\3\ubdcc\3\ube20\3\ube74\3\ubec8\3\ubf1c"+
-    "\3\ubf70\3\ubfc4\3\uc018\3\uc06c\3\uc0c0\3\uc114\3\uc168\3\uc1bc"+
-    "\3\uc210\3\uc264\3\uc2b8\3\uc30c\3\uc360\3\uc3b4\3\uc408\3\uc45c"+
-    "\3\uc4b0\3\uc504\3\uc558\3\uc5ac\3\uc600\3\uc654\3\uc6a8\3\uc6fc"+
-    "\3\uc750\3\uc7a4\3\uc7f8\3\uc84c\3\uc8a0\3\uc8f4\3\uc948\3\uc99c"+
-    "\3\uc9f0\3\uca44\3\uca98\3\ucaec\3\ucb40\3\ucb94\3\ucbe8\3\ucc3c"+
-    "\3\ucc90\3\ucce4\3\ucd38\3\ucd8c\3\ucde0\3\uce34\3\uce88\3\ucedc"+
-    "\3\ucf30\3\ucf84\3\ucfd8\3\ud02c\3\ud080\3\ud0d4\3\ud128\3\ud17c"+
-    "\3\ud1d0\3\ud224\3\ud278\3\ud2cc\3\ud320\3\ud374\3\ud3c8\3\ud41c"+
-    "\3\ud470\3\ud4c4\3\ud518\3\ud56c\3\ud5c0\3\ud614\3\ud668\3\ud6bc"+
-    "\3\ud710\3\ud764\3\ud7b8\3\ud80c\3\ud860\3\ud8b4\3\ud908\3\ud95c"+
-    "\3\ud9b0\3\uda04\3\uda58\3\udaac\3\udb00\3\udb54\3\udba8\3\udbfc"+
-    "\3\udc50\3\udca4\3\udcf8\3\udd4c\3\udda0\3\uddf4\3\ude48\3\ude9c"+
-    "\3\udef0\3\udf44\3\udf98\3\udfec\3\ue040\3\ue094\3\ue0e8\3\ue13c"+
-    "\3\ue190\3\ue1e4\3\ue238\3\ue28c\3\ue2e0\3\ue334\3\ue388\3\ue3dc"+
-    "\3\ue430\3\ue484\3\ue4d8\3\ue52c\3\ue580\3\ue5d4\3\ue628\3\ue67c"+
-    "\3\ue6d0\3\ue724\3\ue778\3\ue7cc\3\ue820\3\ue874\3\ue8c8\3\ue91c"+
-    "\3\ue970\3\ue9c4\3\uea18\3\uea6c\3\ueac0\3\ueb14\3\ueb68\3\uebbc"+
-    "\3\uec10\3\uec64\3\uecb8\3\ued0c\3\ued60\3\uedb4\3\uee08\3\uee5c"+
-    "\3\ueeb0\3\uef04\3\uef58\3\uefac\3\uf000\3\uf054\3\uf0a8\3\uf0fc"+
-    "\3\uf150\3\uf1a4\3\uf1f8\3\uf24c\3\uf2a0\3\uf2f4\3\uf348\3\uf39c"+
-    "\3\uf3f0\3\uf444\3\uf498\3\uf4ec\3\uf540\3\uf594\3\uf5e8\3\uf63c"+
-    "\3\uf690\3\uf6e4\3\uf738\3\uf78c\3\uf7e0\3\uf834\3\uf888\3\uf8dc"+
-    "\3\uf930\3\uf984\3\uf9d8\3\ufa2c\3\ufa80\3\ufad4\3\ufb28\3\ufb7c"+
-    "\3\ufbd0\3\ufc24\3\ufc78\3\ufccc\3\ufd20\3\ufd74\3\ufdc8\3\ufe1c"+
-    "\3\ufe70\3\ufec4\3\uff18\3\uff6c\3\uffc0\4\24\4\150\4\274"+
-    "\4\u0110\4\u0164\4\u01b8\4\u020c\4\u0260\4\u02b4\4\u0308\4\u035c"+
-    "\4\u03b0\4\u0404\4\u0458\4\u04ac\4\u0500\4\u0554\4\u05a8\4\u05fc"+
-    "\4\u0650\4\u06a4\4\u06f8\4\u074c\4\u07a0\4\u07f4\4\u0848\4\u089c"+
-    "\4\u08f0\4\u0944\4\u0998\4\u09ec\4\u0a40\4\u0a94\4\u0ae8\4\u0b3c"+
-    "\4\u0b90\4\u0be4\4\u0c38\4\u0c8c\4\u0ce0\4\u0d34\4\u0d88\4\u0ddc"+
-    "\4\u0e30\4\u0e84\4\u0ed8\4\u0f2c\4\u0f80\4\u0fd4\4\u1028\4\u107c"+
-    "\4\u10d0\4\u1124\4\u1178\4\u11cc\4\u1220\4\u1274\4\u12c8\4\u131c"+
-    "\4\u1370\4\u13c4\4\u1418\4\u146c\4\u14c0\4\u1514\4\u1568\4\u15bc"+
-    "\4\u1610\4\u1664\4\u16b8\4\u170c\4\u1760\4\u17b4\4\u1808\4\u185c"+
-    "\4\u18b0\4\u1904\4\u1958\4\u19ac\4\u1a00\4\u1a54\4\u1aa8\4\u1afc"+
-    "\4\u1b50\4\u1ba4\4\u1bf8\4\u1c4c\4\u1ca0\4\u1cf4\4\u1d48\4\u1d9c"+
-    "\4\u1df0\4\u1e44\4\u1e98\4\u1eec\4\u1f40\4\u1f94\4\u1fe8\4\u203c"+
-    "\4\u2090\4\u20e4\4\u2138\4\u218c\4\u21e0\4\u2234\4\u2288\4\u22dc"+
-    "\4\u2330\4\u2384\4\u23d8\4\u242c\4\u2480\4\u24d4\4\u2528\4\u257c"+
-    "\4\u25d0\4\u2624\4\u2678\4\u26cc\4\u2720\4\u2774\4\u27c8\4\u281c"+
-    "\4\u2870\4\u28c4\4\u2918\4\u296c\4\u29c0\4\u2a14\4\u2a68\4\u2abc"+
-    "\4\u2b10\4\u2b64\4\u2bb8\4\u2c0c\4\u2c60\4\u2cb4\4\u2d08\4\u2d5c"+
-    "\4\u2db0\4\u2e04\4\u2e58\4\u2eac\4\u2f00\4\u2f54\4\u2fa8\4\u2ffc"+
-    "\4\u3050\4\u30a4\4\u30f8\4\u314c\4\u31a0\4\u31f4\4\u3248\4\u329c"+
-    "\4\u32f0\4\u3344\4\u3398\4\u33ec\4\u3440\4\u3494\4\u34e8\4\u353c"+
-    "\4\u3590\4\u35e4\4\u3638\4\u368c\4\u36e0\4\u3734\4\u3788\4\u37dc"+
-    "\4\u3830\4\u3884\4\u38d8\4\u392c\4\u3980\4\u39d4\4\u3a28\4\u3a7c"+
-    "\4\u3ad0\4\u3b24\4\u3b78\4\u3bcc\4\u3c20\4\u3c74\4\u3cc8\4\u3d1c"+
-    "\4\u3d70\4\u3dc4\4\u3e18\4\u3e6c\4\u3ec0\4\u3f14\4\u3f68\4\u3fbc"+
-    "\4\u4010\4\u4064\4\u40b8\4\u410c\4\u4160\4\u41b4\4\u4208\4\u425c"+
-    "\4\u42b0\4\u4304\4\u4358\4\u43ac\4\u4400\4\u4454\4\u44a8\4\u44fc"+
-    "\4\u4550\4\u45a4\4\u45f8\4\u464c\4\u46a0\4\u46f4\4\u4748\4\u479c"+
-    "\4\u47f0\4\u4844\4\u4898\4\u48ec\4\u4940\4\u4994\4\u49e8\4\u4a3c"+
-    "\4\u4a90\4\u4ae4\4\u4b38\4\u4b8c\4\u4be0\4\u4c34\4\u4c88\4\u4cdc"+
-    "\4\u4d30\4\u4d84\4\u4dd8\4\u4e2c\4\u4e80\4\u4ed4\4\u4f28\4\u4f7c"+
-    "\4\u4fd0\4\u5024\4\u5078\4\u50cc\4\u5120\4\u5174\4\u51c8\4\u521c"+
-    "\4\u5270\4\u52c4\4\u5318\4\u536c\4\u53c0\4\u5414\4\u5468\4\u54bc"+
-    "\4\u5510\4\u5564\4\u55b8\4\u560c\4\u5660\4\u56b4\4\u5708\4\u575c"+
-    "\4\u57b0\4\u5804\4\u5858\4\u58ac\4\u5900\4\u5954\4\u59a8\4\u59fc"+
-    "\4\u5a50\4\u5aa4\4\u5af8\4\u5b4c\4\u5ba0\4\u5bf4\4\u5c48\4\u5c9c"+
-    "\4\u5cf0\4\u5d44\4\u5d98\4\u5dec\4\u5e40\4\u5e94\4\u5ee8\4\u5f3c"+
-    "\4\u5f90\4\u5fe4\4\u6038\4\u608c\4\u60e0\4\u6134\4\u6188\4\u61dc"+
-    "\4\u6230\4\u6284\4\u62d8\4\u632c\4\u6380\4\u63d4\4\u6428\4\u647c"+
-    "\4\u64d0\4\u6524\4\u6578\4\u65cc\4\u6620\4\u6674\4\u66c8\4\u671c"+
-    "\4\u6770\4\u67c4\4\u6818\4\u686c\4\u68c0\4\u6914\4\u6968\4\u69bc"+
-    "\4\u6a10\4\u6a64\4\u6ab8\4\u6b0c\4\u6b60\4\u6bb4\4\u6c08\4\u6c5c"+
-    "\4\u6cb0\4\u6d04\4\u6d58\4\u6dac\4\u6e00\4\u6e54\4\u6ea8\4\u6efc"+
-    "\4\u6f50\4\u6fa4\4\u6ff8\4\u704c\4\u70a0\4\u70f4\4\u7148\4\u719c"+
-    "\4\u71f0\4\u7244\4\u7298\4\u72ec\0\u039c\0\u0ad4\0\u0c78\0\u19ec"+
-    "\0\u1a40\0\u1a94\0\u1ae8\0\u1b3c\0\u1b90\0\u1be4\0\u1c38\0\u1c8c"+
-    "\0\u1ce0\0\u1d34\0\u1d88\0\u1ddc\0\u1e30\0\u1e84\0\u1ed8\0\u1f2c"+
-    "\0\u1f80\0\u1fd4\0\u2028\0\u207c\0\u20d0\0\u2124\0\u2178\0\u21cc"+
-    "\0\u2220\4\u7340\4\u7394\4\u73e8\4\u743c\4\u7490\4\u74e4\4\u7538"+
-    "\4\u758c\4\u75e0\4\u7634\4\u7688\1\ud880\4\u76dc\4\u7730\4\u7784"+
-    "\4\u77d8\4\u782c\4\u7880\4\u78d4\4\u7928\4\u797c\4\u79d0\4\u7a24"+
-    "\1\uccb0\4\u7a78\4\u7acc\4\u7b20\4\u7b74\4\u7bc8\4\u7c1c\4\u7c70"+
-    "\4\u7cc4\4\u7d18\4\u7d6c\4\u7dc0\4\u7e14\4\u7e68\4\u7ebc\4\u7f10"+
-    "\4\u7f64\4\u7fb8\4\u800c\1\uaed4\4\u8060\2\u298c\1\ucdac\4\u80b4"+
-    "\4\u8108\4\u815c\4\u81b0\2\u2b30\4\u8204\4\u8258\4\u82ac\4\u8300"+
-    "\4\u8354\4\u83a8\4\u83fc\4\u8450\4\u84a4\4\u84f8\1\ua4fc\4\u854c"+
-    "\4\u85a0\4\u85f4\4\u8648\4\u869c\4\u86f0\4\u8744\4\u8798\4\u87ec"+
-    "\4\u8840\4\u8894\4\u88e8\4\u893c\4\u8990\4\u89e4\4\u8a38\4\u8a8c"+
-    "\4\u8ae0\4\u8b34\4\u8b88\4\u8bdc\4\u8c30\1\ue300\4\u8c84\4\u8cd8"+
-    "\4\u8d2c\4\u8d80\4\u8dd4\4\u8e28\4\u8e7c\4\u8ed0\4\u8f24\4\u8f78"+
-    "\4\u8fcc\4\u9020\4\u9074\4\u90c8\2\64\4\u911c\4\u9170\4\u91c4"+
-    "\4\u9218\4\u926c\4\u92c0\4\u9314\4\u9368\4\u93bc\4\u9410\4\u9464"+
-    "\4\u94b8\4\u950c\4\u9560\4\u95b4\1\ufcec\4\u9608\4\u965c\4\u96b0"+
-    "\4\u9704\4\u9758\4\u97ac\4\u9800\4\u9854\4\u98a8\4\u98fc\4\u9950"+
-    "\4\u99a4\4\u99f8\4\u9a4c\4\u9aa0\4\u9af4\4\u9b48\4\u9b9c\4\u9bf0"+
-    "\4\u9c44\4\u9c98\4\u9cec\4\u9d40\4\u9d94\4\u9de8\4\u9e3c\4\u9e90"+
-    "\1\uc674\2\u08bc\4\u9ee4\4\u9f38\4\u9f8c\4\u9fe0\4\ua034\0\u8f64"+
-    "\4\ua088\2\u07c0\4\ua0dc\4\ua130\4\ua184\4\ua1d8\1\ue108\4\ua22c"+
-    "\4\ua280\4\ua2d4\4\ua328\4\ua37c\4\ua3d0\4\ua424\4\ua478\4\ua4cc"+
-    "\4\ua520\0\u5010\4\ua574\4\ua5c8\4\ua61c\4\ua670\4\ua6c4\4\ua718"+
-    "\2\u2794\4\ua76c\4\ua7c0\4\ua814\4\ua868\1\uc4d0\4\ua8bc\4\ua910"+
-    "\4\ua964\4\ua9b8\4\uaa0c\4\uaa60\4\uaab4\4\uab08\4\uab5c\4\uabb0"+
-    "\4\uac04\4\uac58\4\uacac\4\uad00\4\uad54\4\uada8\4\uadfc\4\uae50"+
-    "\4\uaea4\4\uaef8\4\uaf4c\4\uafa0\4\uaff4\4\ub048\4\ub09c\4\ub0f0"+
-    "\4\ub144\1\ubb4c\4\ub198\4\ub1ec\4\ub240\1\uf218\1\ub1c8\4\ub294"+
-    "\4\ub2e8\4\ub33c\4\ub390\4\ub3e4\4\ub438\4\ub48c\4\ub4e0\4\ub534"+
-    "\4\ub588\4\ub5dc\4\ub630\4\ub684\4\ub6d8\4\ub72c\4\ub780\4\ub7d4"+
-    "\4\ub828\4\ub87c\4\ub8d0\4\ub924\4\ub978\1\ue6f0\4\ub9cc\4\uba20"+
-    "\4\uba74\4\ubac8\4\ubb1c\4\ubb70\4\ubbc4\4\ubc18\4\ubc6c\4\ubcc0"+
-    "\4\ubd14\4\ubd68\4\ubdbc\4\ube10\4\ube64\4\ubeb8\4\ubf0c\4\ubf60"+
-    "\4\ubfb4\4\uc008\4\uc05c\4\uc0b0\4\uc104\4\uc158\4\uc1ac\4\uc200"+
-    "\4\uc254\4\uc2a8\4\uc2fc\4\uc350\4\uc3a4\4\uc3f8\4\uc44c\4\uc4a0"+
-    "\4\uc4f4\4\uc548\4\uc59c\4\uc5f0\4\uc644\4\uc698\4\uc6ec\4\uc740"+
-    "\4\uc794\4\uc7e8\4\uc83c\4\uc890\4\uc8e4\4\uc938\4\uc98c\4\uc9e0"+
-    "\4\uca34\4\uca88\1\ue258\4\ucadc\4\ucb30\4\ucb84\4\ucbd8\4\ucc2c"+
-    "\4\ucc80\4\uccd4\4\ucd28\1\ucd04\4\ucd7c\4\ucdd0\4\uce24\4\uce78"+
-    "\4\ucecc\4\ucf20\4\uaf4c\4\ucf74\4\ucfc8\4\ud01c\0\u7914\4\ud070"+
-    "\4\ud0c4\4\ud118\4\ud16c\4\ud1c0\4\ud214\4\ud268\4\ud2bc\4\ud310"+
-    "\4\ud364\4\ud3b8\4\ud40c\4\ud460\4\ud4b4\4\ud508\4\ud55c\4\ud5b0"+
-    "\1\uafd0\4\ud604\4\ud658\4\ud6ac\4\ud700\4\ud754\4\ud7a8\4\ud7fc"+
-    "\4\ud850\4\ud8a4\4\ud8f8\4\ud94c\4\ud9a0\4\ud9f4\4\uda48\4\uda9c"+
-    "\4\udaf0\4\udb44\4\udb98\4\udbec\4\udc40\4\udc94\4\udce8\4\udd3c"+
-    "\4\udd90\4\udde4\4\ude38\4\ude8c\1\ufd40\4\udee0\4\udf34\4\udf88"+
-    "\4\udfdc\4\u8d80\4\ue030\4\ue084\4\ue0d8\4\ue12c\4\uab08\4\ue180"+
-    "\4\ue1d4\4\ue228\4\ue27c\4\ue2d0\1\ub414\4\ue324\4\ue378\4\ue3cc"+
-    "\4\ue420\4\ue474\4\ue4c8\4\ue51c\4\ue570\4\ue5c4\4\ue618\2\u1c6c"+
-    "\4\ue66c\4\ue6c0\4\ub588\4\ue714\4\ue768\4\ue7bc\4\ue810\4\ue864"+
-    "\4\ue8b8\4\ue90c\4\ue960\4\ue9b4\4\uea08\2\u1294\4\uea5c\4\ueab0"+
-    "\4\ueb04\4\ueb58\4\uebac\4\uec00\4\uec54\4\ueca8\0\u2514\0\u2568"+
-    "\0\u25bc\0\u2610\0\u2664\0\u26b8\0\u270c\0\u2760\0\u27b4\0\u2808"+
-    "\0\u285c\0\u28b0\0\u2904\0\u2958\0\u29ac\0\u2a00\0\u2a54\0\u2aa8"+
-    "\0\u2afc\0\u2b50\0\u2ba4\0\u2bf8\0\u2c4c\0\u2ca0\0\u2cf4\0\u2d48"+
-    "\4\uecfc\2\u7600\4\ued50\4\ueda4\4\uedf8\4\uee4c\4\ueea0\4\ueef4"+
-    "\4\uef48\4\uef9c\4\ueff0\4\uf044\4\uf098\2\u6a30\4\uf0ec\4\uf140"+
-    "\4\uf194\4\uf1e8\4\uf23c\4\uf290\4\uf2e4\4\uf338\4\uf38c\4\uf3e0"+
-    "\4\uf434\4\uf488\4\uf4dc\4\uf530\4\uf584\4\uf5d8\4\uf62c\4\uf680"+
-    "\2\u4c54\4\uf6d4\2\uc70c\2\u6b2c\4\uf728\4\uf77c\4\uf7d0\4\uf824"+
-    "\2\uc8b0\4\uf878\4\uf8cc\4\uf920\4\uf974\4\uf9c8\4\ufa1c\4\ufa70"+
-    "\4\ufac4\4\ufb18\4\ufb6c\2\u4378\4\ufbc0\4\ufc14\4\ufc68\4\ufcbc"+
-    "\4\ufd10\4\ufd64\4\ufdb8\4\ufe0c\4\ufe60\4\ufeb4\4\uff08\4\uff5c"+
-    "\4\uffb0\5\4\5\130\5\254\5\u0100\5\u0154\5\u01a8\5\u01fc"+
-    "\5\u0250\5\u02a4\2\u8080\5\u02f8\5\u034c\5\u03a0\5\u03f4\5\u0448"+
-    "\5\u049c\5\u04f0\5\u0544\5\u0598\5\u05ec\5\u0640\5\u0694\5\u06e8"+
-    "\5\u073c\2\u9db4\5\u0790\5\u07e4\5\u0838\5\u088c\5\u08e0\5\u0934"+
-    "\5\u0988\5\u09dc\5\u0a30\5\u0a84\5\u0ad8\5\u0b2c\5\u0b80\5\u0bd4"+
-    "\5\u0c28\2\u9a6c\5\u0c7c\5\u0cd0\5\u0d24\5\u0d78\5\u0dcc\5\u0e20"+
-    "\5\u0e74\5\u0ec8\5\u0f1c\5\u0f70\5\u0fc4\5\u1018\5\u106c\5\u10c0"+
-    "\5\u1114\5\u1168\5\u11bc\5\u1210\5\u1264\5\u12b8\5\u130c\5\u1360"+
-    "\5\u13b4\5\u1408\5\u145c\5\u14b0\5\u1504\2\u63f4\2\ua63c\5\u1558"+
-    "\5\u15ac\5\u1600\5\u1654\5\u16a8\0\ue3b8\5\u16fc\2\ua540\5\u1750"+
-    "\5\u17a4\5\u17f8\5\u184c\2\u7e88\5\u18a0\5\u18f4\5\u1948\5\u199c"+
-    "\5\u19f0\5\u1a44\5\u1a98\5\u1aec\5\u1b40\5\u1b94\0\ua464\5\u1be8"+
-    "\5\u1c3c\5\u1c90\5\u1ce4\5\u1d38\5\u1d8c\2\uc514\5\u1de0\5\u1e34"+
-    "\5\u1e88\5\u1edc\2\u6250\5\u1f30\5\u1f84\5\u1fd8\5\u202c\5\u2080"+
-    "\5\u20d4\5\u2128\5\u217c\5\u21d0\5\u2224\5\u2278\5\u22cc\5\u2320"+
-    "\5\u2374\5\u23c8\5\u241c\5\u2470\5\u24c4\5\u2518\5\u256c\5\u25c0"+
-    "\5\u2614\5\u2668\5\u26bc\5\u2710\5\u2764\5\u27b8\2\u58cc\5\u280c"+
-    "\5\u2860\5\u28b4\2\u8f98\2\u4f48\5\u2908\5\u295c\5\u29b0\5\u2a04"+
-    "\5\u2a58\5\u2aac\5\u2b00\5\u2b54\5\u2ba8\5\u2bfc\5\u2c50\5\u2ca4"+
-    "\5\u2cf8\5\u2d4c\5\u2da0\5\u2df4\5\u2e48\5\u2e9c\5\u2ef0\5\u2f44"+
-    "\5\u2f98\5\u2fec\2\u8470\5\u3040\5\u3094\5\u30e8\5\u313c\5\u3190"+
-    "\5\u31e4\5\u3238\5\u328c\5\u32e0\5\u3334\5\u3388\5\u33dc\5\u3430"+
-    "\5\u3484\5\u34d8\5\u352c\5\u3580\5\u35d4\5\u3628\5\u367c\5\u36d0"+
-    "\5\u3724\5\u3778\5\u37cc\5\u3820\5\u3874\5\u38c8\5\u391c\5\u3970"+
-    "\5\u39c4\5\u3a18\5\u3a6c\5\u3ac0\5\u3b14\5\u3b68\5\u3bbc\5\u3c10"+
-    "\5\u3c64\5\u3cb8\5\u3d0c\5\u3d60\5\u3db4\5\u3e08\5\u3e5c\5\u3eb0"+
-    "\5\u3f04\5\u3f58\5\u3fac\5\u4000\5\u4054\5\u40a8\5\u40fc\2\u7fd8"+
-    "\5\u4150\5\u41a4\5\u41f8\5\u424c\5\u42a0\5\u42f4\5\u4348\5\u439c"+
-    "\2\u6a84\5\u43f0\5\u4444\5\u4498\5\u44ec\5\u4540\5\u4594\5\u25c0"+
-    "\5\u45e8\5\u463c\5\u4690\0\ucd68\5\u46e4\5\u4738\5\u478c\5\u47e0"+
-    "\5\u4834\5\u4888\5\u48dc\5\u4930\5\u4984\5\u49d8\5\u4a2c\5\u4a80"+
-    "\5\u4ad4\5\u4b28\5\u4b7c\5\u4bd0\5\u4c24\2\u4d50\5\u4c78\5\u4ccc"+
-    "\5\u4d20\5\u4d74\5\u4dc8\5\u4e1c\5\u4e70\5\u4ec4\5\u4f18\5\u4f6c"+
-    "\5\u4fc0\5\u5014\5\u5068\5\u50bc\5\u5110\5\u5164\5\u51b8\5\u520c"+
-    "\5\u5260\5\u52b4\5\u5308\5\u535c\5\u53b0\5\u5404\5\u5458\5\u54ac"+
-    "\5\u5500\2\u9ac0\5\u5554\5\u55a8\5\u55fc\5\u5650\5\u03f4\5\u56a4"+
-    "\5\u56f8\5\u574c\5\u57a0\5\u217c\5\u57f4\5\u5848\5\u589c\5\u58f0"+
-    "\5\u5944\2\u5194\5\u5998\5\u59ec\5\u5a40\5\u5a94\5\u5ae8\5\u5b3c"+
-    "\5\u5b90\5\u5be4\5\u5c38\5\u5c8c\2\ub9ec\5\u5ce0\5\u2bfc\5\u5d34"+
-    "\5\u5d88\5\u5ddc\5\u5e30\5\u5e84\5\u5ed8\5\u5f2c\5\u5f80\5\u5fd4"+
-    "\5\u6028\2\ub014\5\u607c\5\u60d0\5\u6124\5\u6178\5\u61cc\5\u6220"+
-    "\5\u6274\5\u62c8\5\u631c\5\u6370\5\u63c4\5\u6418\5\u646c\5\u64c0"+
-    "\5\u6514\5\u6568\5\u65bc\5\u6610\5\u6664\5\u66b8\5\u670c\5\u6760"+
-    "\5\u67b4\5\u6808\5\u685c\5\u68b0\5\u6904\5\u6958\5\u69ac\5\u6a00"+
-    "\5\u6a54\5\u6aa8\5\u6afc\5\u6b50\5\u6ba4\5\u6bf8\5\u6c4c\5\u6ca0"+
-    "\5\u6cf4\5\u6d48\5\u6d9c\5\u6df0\5\u6e44\5\u6e98\5\u6eec\5\u6f40"+
-    "\5\u6f94\5\u6fe8\5\u703c\5\u7090\5\u70e4\5\u7138\5\u718c\5\u71e0"+
-    "\5\u7234\5\u7288\5\u72dc\5\u7330\5\u7384\5\u73d8\5\u742c\5\u7480"+
-    "\5\u74d4\5\u7528\5\u757c\5\u75d0\5\u7624\5\u7678\5\u76cc\5\u7720"+
-    "\5\u7774\5\u77c8\5\u781c\5\u7870\5\u78c4\5\u7918\5\u796c\5\u79c0"+
-    "\5\u7a14\5\u7a68\5\u7abc\5\u7b10\5\u7b64\5\u7bb8\5\u7c0c\5\u7c60"+
-    "\5\u7cb4\5\u7d08\5\u7d5c\5\u7db0\5\u7e04\5\u7e58\5\u7eac\5\u7f00"+
-    "\5\u7f54\5\u7fa8\5\u7ffc\5\u8050\5\u80a4\5\u80f8\5\u814c\5\u81a0"+
-    "\5\u81f4\5\u8248\5\u829c\5\u82f0\5\u8344\5\u8398\5\u83ec\5\u8440"+
-    "\5\u8494\5\u84e8\5\u853c\5\u8590\5\u85e4\5\u8638\5\u868c\5\u86e0"+
-    "\5\u8734\5\u8788\5\u87dc\5\u8830\5\u8884\5\u88d8\5\u892c\5\u8980"+
-    "\5\u89d4\5\u8a28\5\u8a7c\5\u8ad0\5\u8b24\5\u8b78\5\u8bcc\5\u8c20"+
-    "\5\u8c74\5\u8cc8\5\u8d1c\5\u8d70\5\u8dc4\5\u8e18\5\u8e6c\5\u8ec0"+
-    "\5\u8f14\5\u8f68\5\u8fbc\5\u9010\5\u9064\5\u90b8\5\u910c\5\u9160"+
-    "\5\u91b4\5\u9208\5\u925c\5\u92b0\5\u9304\5\u9358\5\u93ac\5\u9400"+
-    "\5\u9454\5\u94a8\5\u94fc\5\u9550\5\u95a4\5\u95f8\5\u964c\5\u96a0"+
-    "\5\u96f4\5\u9748\5\u979c\5\u97f0\5\u9844\5\u9898\5\u98ec\5\u9940"+
-    "\5\u9994\5\u99e8\5\u9a3c\5\u9a90\5\u9ae4\5\u9b38\5\u9b8c\5\u9be0"+
-    "\5\u9c34\5\u9c88\5\u9cdc\5\u9d30\5\u9d84\5\u9dd8\5\u9e2c\5\u9e80"+
-    "\5\u9ed4\5\u9f28\5\u9f7c\5\u9fd0\5\ua024\5\ua078\5\ua0cc\5\ua120"+
-    "\5\ua174\5\ua1c8\5\ua21c\5\ua270\5\ua2c4\5\ua318\5\ua36c\5\ua3c0"+
-    "\5\ua414\5\ua468\5\ua4bc\5\ua510\5\ua564\5\ua5b8\5\ua60c\5\ua660"+
-    "\5\ua6b4\5\ua708\5\ua75c\5\ua7b0\5\ua804\5\ua858\5\ua8ac\5\ua900"+
-    "\5\ua954\5\ua9a8\5\ua9fc\5\uaa50\5\uaaa4\5\uaaf8\5\uab4c\5\uaba0"+
-    "\5\uabf4\5\uac48\5\uac9c\5\uacf0\5\uad44\5\uad98\5\uadec\5\uae40"+
-    "\5\uae94\5\uaee8\5\uaf3c\5\uaf90\5\uafe4\5\ub038\5\ub08c\5\ub0e0"+
-    "\5\ub134\5\ub188\5\ub1dc\5\ub230\5\ub284\5\ub2d8\5\ub32c\5\ub380"+
-    "\5\ub3d4\5\ub428\5\ub47c\5\ub4d0\5\ub524\5\ub578\5\ub5cc\5\ub620"+
-    "\5\ub674\5\ub6c8\5\ub71c\5\ub770\5\ub7c4\5\ub818\5\ub86c\5\ub8c0"+
-    "\5\ub914\5\ub968\5\ub9bc\5\uba10\5\uba64\5\ubab8\5\ubb0c\5\ubb60"+
-    "\5\ubbb4\5\ubc08\5\ubc5c\5\ubcb0\5\ubd04\5\ubd58\5\ubdac\5\ube00"+
-    "\5\ube54\5\ubea8\5\ubefc\5\ubf50\5\ubfa4\5\ubff8\5\uc04c\5\uc0a0"+
-    "\5\uc0f4\5\uc148\5\uc19c\5\uc1f0\5\uc244\5\uc298\5\uc2ec\5\uc340"+
-    "\5\uc394\5\uc3e8\5\uc43c\5\uc490\5\uc4e4\5\uc538\5\uc58c\5\uc5e0"+
-    "\5\uc634\5\uc688\5\uc6dc\5\uc730\5\uc784\5\uc7d8\5\uc82c\5\uc880"+
-    "\5\uc8d4\5\uc928\5\uc97c\5\uc9d0\5\uca24\5\uca78\5\ucacc\5\ucb20"+
-    "\5\ucb74\5\ucbc8\5\ucc1c\5\ucc70\5\uccc4\5\ucd18\5\ucd6c\5\ucdc0"+
-    "\5\uce14\5\uce68\5\ucebc\5\ucf10\5\ucf64\5\ucfb8\5\ud00c\5\ud060"+
-    "\5\ud0b4\5\ud108\5\ud15c\5\ud1b0\5\ud204\5\ud258\5\ud2ac\5\ud300"+
-    "\5\ud354\5\ud3a8\5\ud3fc\5\ud450\5\ud4a4\5\ud4f8\5\ud54c\5\ud5a0"+
-    "\5\ud5f4\5\ud648\5\ud69c\5\ud6f0\5\ud744\5\ud798\5\ud7ec\5\ud840"+
-    "\5\ud894\5\ud8e8\5\ud93c\5\ud990\5\ud9e4\5\uda38\5\uda8c\5\udae0"+
-    "\5\udb34\5\udb88\5\udbdc\5\udc30\5\udc84\5\udcd8\5\udd2c\5\udd80"+
-    "\5\uddd4\5\ude28\5\ude7c\5\uded0\5\udf24\5\udf78\5\udfcc\5\ue020"+
-    "\5\ue074\5\ue0c8\5\ue11c\5\ue170\5\ue1c4\5\ue218\5\ue26c\5\ue2c0"+
-    "\5\ue314\5\ue368\5\ue3bc\5\ue410\5\ue464\5\ue4b8\5\ue50c\5\ue560"+
-    "\5\ue5b4\5\ue608\5\ue65c\5\ue6b0\5\ue704\5\ue758\5\ue7ac\5\ue800"+
-    "\5\ue854\5\ue8a8\5\ue8fc\5\ue950\5\ue9a4\5\ue9f8\5\uea4c\5\ueaa0"+
-    "\5\ueaf4\5\ueb48\5\ueb9c\5\uebf0\5\uec44\5\uec98\5\uecec\5\ued40"+
-    "\5\ued94\5\uede8\5\uee3c\5\uee90\5\ueee4\5\uef38\5\uef8c\5\uefe0"+
-    "\5\uf034\5\uf088\5\uf0dc\5\uf130\5\uf184\5\uf1d8\5\uf22c\5\uf280"+
-    "\5\uf2d4\5\uf328\5\uf37c\5\uf3d0\5\uf424\5\uf478\5\uf4cc\5\uf520"+
-    "\5\uf574\5\uf5c8\5\uf61c\5\uf670\5\uf6c4\5\uf718\5\uf76c\5\uf7c0"+
-    "\5\uf814\5\uf868\5\uf8bc\5\uf910\5\uf964\5\uf9b8\5\ufa0c\5\ufa60"+
-    "\5\ufab4\5\ufb08\5\ufb5c\5\ufbb0\5\ufc04\5\ufc58\5\ufcac\5\ufd00"+
-    "\5\ufd54\5\ufda8\5\ufdfc\5\ufe50\5\ufea4\5\ufef8\5\uff4c\5\uffa0"+
-    "\5\ufff4\6\110\6\234\6\360\6\u0144\6\u0198\6\u01ec\6\u0240"+
-    "\6\u0294\6\u02e8\6\u033c\6\u0390\6\u03e4\6\u0438\6\u048c\6\u04e0"+
-    "\6\u0534\6\u0588\6\u05dc\6\u0630\6\u0684\6\u06d8\6\u072c\6\u0780"+
-    "\6\u07d4\6\u0828\6\u087c\6\u08d0\6\u0924\6\u0978\6\u09cc\6\u0a20"+
-    "\6\u0a74\6\u0ac8\6\u0b1c\6\u0b70\6\u0bc4\6\u0c18\6\u0c6c\6\u0cc0"+
-    "\6\u0d14\6\u0d68\6\u0dbc\6\u0e10\6\u0e64\6\u0eb8\6\u0f0c\6\u0f60"+
-    "\6\u0fb4\6\u1008\6\u105c\6\u10b0\6\u1104\6\u1158\6\u11ac\6\u1200"+
-    "\6\u1254\6\u12a8\6\u12fc\6\u1350\6\u13a4\6\u13f8\6\u144c\6\u14a0"+
-    "\6\u14f4\6\u1548\6\u159c\6\u15f0\6\u1644\6\u1698\6\u16ec\6\u1740"+
-    "\6\u1794\6\u17e8\6\u183c\6\u1890\6\u18e4\6\u1938\6\u198c\6\u19e0"+
-    "\6\u1a34\6\u1a88\6\u1adc\6\u1b30\6\u1b84\6\u1bd8\6\u1c2c\6\u1c80"+
-    "\6\u1cd4\6\u1d28\6\u1d7c\6\u1dd0\6\u1e24\6\u1e78\6\u1ecc\6\u1f20"+
-    "\6\u1f74\6\u1fc8\6\u201c\6\u2070\6\u20c4\6\u2118\6\u216c\6\u21c0"+
-    "\6\u2214\6\u2268\6\u22bc\6\u2310\6\u2364\6\u23b8\6\u240c\6\u2460"+
-    "\6\u24b4\6\u2508\6\u255c\6\u25b0\6\u2604\6\u2658\6\u26ac\6\u2700"+
-    "\6\u2754\6\u27a8\6\u27fc\6\u2850\6\u28a4\6\u28f8\6\u294c\6\u29a0"+
-    "\6\u29f4\6\u2a48\6\u2a9c\6\u2af0\6\u2b44\6\u2b98\6\u2bec\6\u2c40"+
-    "\6\u2c94\6\u2ce8\6\u2d3c\6\u2d90\6\u2de4\6\u2e38\6\u2e8c\6\u2ee0"+
-    "\6\u2f34\6\u2f88\6\u2fdc\6\u3030\6\u3084\6\u30d8\6\u312c\6\u3180"+
-    "\6\u31d4\6\u3228\6\u327c\6\u32d0\6\u3324\6\u3378\6\u33cc\6\u3420"+
-    "\6\u3474\6\u34c8\6\u351c\6\u3570\6\u35c4\6\u3618\6\u366c\6\u36c0"+
-    "\6\u3714\6\u3768\6\u37bc\6\u3810\6\u3864\6\u38b8\6\u390c\6\u3960"+
-    "\6\u39b4\6\u3a08\6\u3a5c\6\u3ab0\6\u3b04\6\u3b58\6\u3bac\6\u3c00"+
-    "\6\u3c54\6\u3ca8\6\u3cfc\6\u3d50\6\u3da4\6\u3df8\6\u3e4c\6\u3ea0"+
-    "\6\u3ef4\6\u3f48\6\u3f9c\6\u3ff0\6\u4044\6\u4098\6\u40ec\6\u4140"+
-    "\6\u4194\6\u41e8\6\u423c\6\u4290\6\u42e4\6\u4338\6\u438c\6\u43e0"+
-    "\6\u4434\6\u4488\6\u44dc\6\u4530\6\u4584\6\u45d8\6\u462c\6\u4680"+
-    "\6\u46d4\6\u4728\6\u477c\6\u47d0\6\u4824\6\u4878\6\u48cc\6\u4920"+
-    "\

<TRUNCATED>

[14/24] lucene-solr:branch_8x: LUCENE-8527: Upgrade JFlex to 1.7.0. StandardTokenizer and UAX29URLEmailTokenizer now support Unicode 9.0, and provide UTS#51 v11.0 Emoji tokenization with the '' token type.

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
index 292f2ad..e4b10af 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
@@ -37,12 +37,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
  *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
  *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ *   <li>&lt;EMOJI&gt;: A sequence of Emoji characters</li>
  * </ul>
  */
 @SuppressWarnings("fallthrough")
 %%
 
-%unicode 6.3
+%unicode 9.0
 %integer
 %final
 %public
@@ -52,22 +53,73 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %xstate AVOID_BAD_URL
 %buffer 255
 
-// UAX#29 WB4. X (Extend | Format)* --> X
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+
+//////////////////////////////////////////////////////////////////////////
+// Begin Emoji Macros - see documentation below, near the EMOJI_TYPE rule
+
+// TODO: Remove this include file when JFlex supports these properties directly (in Unicode 11.0+)
+%include ../../../../../../../../../core/src/data/jflex/UnicodeEmojiProperties.jflex
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
+//
+//   \uFE0E (Text Presentation Selector) and \uFE0F (Emoji Presentation Selector) - included in \p{WB:Extend}
+//   - are explicitly excluded here so that we can properly handle Emoji sequences.
+//
+ExtFmtZwjSansPresSel = [[\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]--[\uFE0E\uFE0F]]*
+
+KeyCapBaseChar = [0-9#*]
+KeyCapBaseCharEx = {KeyCapBaseChar} {ExtFmtZwjSansPresSel}
+KeyCap = \u20E3
+KeyCapEx = {KeyCap} {ExtFmtZwjSansPresSel}
+
+// # \u3030 = WAVY DASH; \u303D = PART ALTERNATION MARK
+AccidentalEmoji = [ยฉยฎโ„ข\u3030\u303D]
+EmojiRKAM = ( \p{WB:Regional_Indicator} | {KeyCapBaseChar} | {AccidentalEmoji} | {Emoji_Modifier} )
+
+// Unlike Unicode properties, macros are not allowed in character classes, so we achieve set difference
+// by applying DeMorgan: the expression that matches everything of 'a' not matched by 'b' is: !(!a|b)
+// TODO: Convert this expression to character class difference when JFlex supports the properties directly (in Unicode 11.0+)
+EmojiSansRKAM = !( ! {Emoji} | {EmojiRKAM} )
+
+EmojiChar = ( {Extended_Pictographic} | {EmojiSansRKAM} )
+
+EmojiCharEx         = {EmojiChar}           {ExtFmtZwjSansPresSel}
+EmojiModifierBaseEx = {Emoji_Modifier_Base} {ExtFmtZwjSansPresSel}
+EmojiModifierEx     = {Emoji_Modifier}      {ExtFmtZwjSansPresSel}
+
+EmojiPresentationSelector = \uFE0F
+EmojiCharOrPresSeqOrModSeq = ( \p{WB:ZWJ}* {EmojiCharEx} {EmojiPresentationSelector}? ) | ( ( \p{WB:ZWJ}* {EmojiModifierBaseEx} )? {EmojiModifierEx} )
+TagSpec = [\u{E0020}-\u{E007E}]
+TagTerm = \u{E007F}
+
+// End Emoji Macros
+//////////////////////////////////////////////////////////////////////////
+
+
+// UAX#29 WB4.  X (Extend | Format | ZWJ)* --> X
 //
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
-NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
-HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
+ExtFmtZwj           = [\p{WB:Format}\p{WB:Extend}\p{WB:ZWJ}]*
+
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] {ExtFmtZwj}
+AHLetterEx          = [\p{WB:ALetter}\p{WB:Hebrew_Letter}]                      {ExtFmtZwj}
+NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        {ExtFmtZwj}
+KatakanaEx          = \p{WB:Katakana}                                           {ExtFmtZwj} 
+MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      {ExtFmtZwj} 
+MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         {ExtFmtZwj}
+ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       {ExtFmtZwj}
+HanEx               = \p{Script:Han}                                            {ExtFmtZwj}
+HiraganaEx          = \p{Script:Hiragana}                                       {ExtFmtZwj}
+SingleQuoteEx       = \p{WB:Single_Quote}                                       {ExtFmtZwj}
+DoubleQuoteEx       = \p{WB:Double_Quote}                                       {ExtFmtZwj}
+HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      {ExtFmtZwj}
+RegionalIndicatorEx = \p{WB:Regional_Indicator}                                 {ExtFmtZwj}
+ComplexContextEx    = \p{LB:Complex_Context}                                    {ExtFmtZwj}
+
 
 // URL and E-mail syntax specifications:
 //
@@ -174,18 +226,28 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
    */
   public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
   
+  /** Ideographic token type */
   public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
   
+  /** Hiragana token type */
   public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
   
+  /** Katakana token type */
   public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
   
+  /** Hangul token type */
   public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
   
+  /** Email token type */
   public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
   
+  /** URL token type */
   public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
 
+  /** Emoji token type */
+  public static final int EMOJI_TYPE = UAX29URLEmailTokenizer.EMOJI;
+
+  /** Character count processed so far */
   public final int yychar()
   {
     return yychar;
@@ -213,11 +275,11 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
 
 <YYINITIAL, AVOID_BAD_URL> {
 
-// UAX#29 WB1.   sot   รท
-//        WB2.     รท   eot
+// UAX#29 WB1.    sot รท Any
+//        WB2.    Any รท eot
 //
   <<EOF>> { return YYEOF; }
-
+  
   {URL}   { yybegin(YYINITIAL); return URL_TYPE; }
 
   // LUCENE-5391: Don't recognize no-scheme domain-only URLs with a following alphanumeric character
@@ -244,14 +306,61 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
 
   {EMAIL} { yybegin(YYINITIAL); return EMAIL_TYPE; }
 
-  // UAX#29 WB8.   Numeric ร— Numeric
-  //        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) ร— Numeric
-  //        WB12.  Numeric ร— (MidNum | MidNumLet | Single_Quote) Numeric
-  //        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-  //        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana)
+
+  // Instead of these: UAX#29 WB3c. ZWJ ร— (Glue_After_Zwj | EBG)
+  //                          WB14. (E_Base | EBG) ร— E_Modifier
+  //                          WB15. ^ (RI RI)* RI ร— RI
+  //                          WB16. [^RI] (RI RI)* RI ร— RI
+  //
+  // We use the "emoji_sequence" rule from http://www.unicode.org/reports/tr51/tr51-14.html (Unicode 11.0)
+  // and the Emoji data from http://unicode.org/Public/emoji/11.0/emoji-data.txt (in included file UnicodeEmojiProperties.jflex)
+  // 
+  // emoji_sequence :=
+  //    Top-level EBNF           Expanded #1                       Expanded #2                       Expanded #3
+  //    ---------------------    ----------------------------      -----------------------------     ----------------------------------------------
+  //      emoji_core_sequence      emoji_combining_sequence          emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_keycap_sequence           | [0-9#*] \u{FE0F 20E3}      [1]
+  //                             | emoji_modifier_sequence                                           | \p{Emoji_Modifier_Base} \p{Emoji_Modifier}
+  //                             | emoji_flag_sequence                                               | \p{WB:Regional_Indicator}{2}               )
+  //
+  //    | emoji_zwj_sequence       emoji_zwj_element                 emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+  //                             ( ZWJ emoji_zwj_element )+                                          ( \p{WB:ZWJ} ^^ )+
+  // 
+  //    | emoji_tag_sequence     tag_base                            emoji_character                 ( \p{Emoji}
+  //                                                               | emoji_presentation_sequence     | \p{Emoji} \uFE0F
+  //                                                               | emoji_modifier_sequence         | \p{Emoji_Modifier_Base} \p{Emoji_Modifier} )
+  //                             tag_spec                                                            [\u{E0020}-\u{E007E}]+
+  //                             tag_term                                                            \u{E007F}
+  //
+  // [1] https://unicode.org/Public/emoji/11.0/emoji-test.txt includes key cap sequences 
+  //     WITHOUT \uFE0F (emoji presentation indicator), annotating them as "non-fully-qualified";
+  //     TR#51 says about non-fully-qualified *ZWJ sequences* that implementations may
+  //     choose whether to support them for segmentation.  This implementation will
+  //     recognize /[0-9#*]\u20E3/ - i.e. without \uFE0F - as Emoji. 
+  //
+  // See also: http://www.unicode.org/L2/L2016/16315-handling-seg-emoji.pdf
+  //           https://docs.google.com/document/d/1yDZ5TUZNVVKaM9zYCCLbRIAKGNZANsAGl0bcNzGGvn8
+  //
+  //     In particular, the above docs recommend a modified UAX#29 WB3c rule (covered by TR#51's "emoji_zwj_sequence"):
+  //
+  //         WB3cโ€ฒ ZWJ ร— โ€‹(Extended_Pictographic | EmojiNRK)
+  //
+    {EmojiCharOrPresSeqOrModSeq} ( ( \p{WB:ZWJ} {EmojiCharOrPresSeqOrModSeq} )* | {TagSpec}+ {TagTerm} ) 
+  | {KeyCapBaseCharEx} {EmojiPresentationSelector}? {KeyCapEx} 
+  | {RegionalIndicatorEx}{2} 
+    { yybegin(YYINITIAL); return EMOJI_TYPE; }
+
+  // UAX#29 WB8.    Numeric ร— Numeric
+  //        WB11.   Numeric (MidNum | MidNumLetQ) ร— Numeric
+  //        WB12.   Numeric ร— (MidNum | MidNumLetQ) Numeric
+  //        WB13a.  (AHLetter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+  //        WB13b.  ExtendNumLet ร— (AHLetter | Numeric | Katakana)
   //
   {ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
-    {  yybegin(YYINITIAL); return NUMERIC_TYPE; }
+    { yybegin(YYINITIAL); return NUMERIC_TYPE; }
 
   // subset of the below for typing purposes only!
   {HangulEx}+
@@ -260,32 +369,32 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   {KatakanaEx}+
     { yybegin(YYINITIAL); return KATAKANA_TYPE; }
 
-  // UAX#29 WB5.   (ALetter | Hebrew_Letter) ร— (ALetter | Hebrew_Letter)
-  //        WB6.   (ALetter | Hebrew_Letter) ร— (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-  //        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) ร— (ALetter | Hebrew_Letter)
-  //        WB7a.  Hebrew_Letter ร— Single_Quote
-  //        WB7b.  Hebrew_Letter ร— Double_Quote Hebrew_Letter
-  //        WB7c.  Hebrew_Letter Double_Quote ร— Hebrew_Letter
-  //        WB9.   (ALetter | Hebrew_Letter) ร— Numeric
-  //        WB10.  Numeric ร— (ALetter | Hebrew_Letter)
-  //        WB13.  Katakana ร— Katakana
-  //        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
-  //        WB13b. ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana)
+  // UAX#29 WB5.    AHLetter ร— AHLetter
+  //        WB6.    AHLetter ร— (MidLetter | MidNumLetQ) AHLetter
+  //        WB7.    AHLetter (MidLetter | MidNumLetQ) ร— AHLetter
+  //        WB7a.   Hebrew_Letter ร— Single_Quote
+  //        WB7b.   Hebrew_Letter ร— Double_Quote Hebrew_Letter
+  //        WB7c.   Hebrew_Letter Double_Quote ร— Hebrew_Letter
+  //        WB9.    AHLetter ร— Numeric
+  //        WB10.   Numeric ร— AHLetter
+  //        WB13.   Katakana ร— Katakana
+  //        WB13a.  (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) ร— ExtendNumLet
+  //        WB13b.  ExtendNumLet ร— (ALetter | Hebrew_Letter | Numeric | Katakana) 
   //
-  {ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                       | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+  {ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                       | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                        )+
                      )
-  ({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                       | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+  ({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                        )*
+                     | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx} )
+                       | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}      )*
+                       | {AHLetterEx}        ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {AHLetterEx}     )*
                        )+
                      )
   )*
-  {ExtendNumLetEx}*
+  {ExtendNumLetEx}* 
     { yybegin(YYINITIAL); return WORD_TYPE; }
 
 
@@ -297,7 +406,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   //    annex.  That means that satisfactory treatment of languages like Chinese
   //    or Thai requires special handling.
   //
-  // In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
+  // In Unicode 9.0, only one character has the \p{Line_Break = Contingent_Break}
   // property: U+FFFC ( ๏ฟผ ) OBJECT REPLACEMENT CHARACTER.
   //
   // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -310,18 +419,15 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
   //
   {ComplexContextEx}+ { yybegin(YYINITIAL); return SOUTH_EAST_ASIAN_TYPE; }
 
-  // UAX#29 WB14.  Any รท Any
+  // UAX#29 WB999.  Any รท Any
   //
   {HanEx} { yybegin(YYINITIAL); return IDEOGRAPHIC_TYPE; }
   {HiraganaEx} { yybegin(YYINITIAL); return HIRAGANA_TYPE; }
 
-
-  // UAX#29 WB3.   CR ร— LF
-  //        WB3a.  (Newline | CR | LF) รท
-  //        WB3b.  รท (Newline | CR | LF)
-  //        WB13c. Regional_Indicator ร— Regional_Indicator
-  //        WB14.  Any รท Any
+  // UAX#29 WB3.    CR ร— LF
+  //        WB3a.   (Newline | CR | LF) รท
+  //        WB3b.   รท (Newline | CR | LF)
+  //        WB999.  Any รท Any
   //
-  {RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-    { yybegin(YYINITIAL); /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+  [^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, emoji or SE Asian -- ignore it. */ }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
index 7f9227f..9295e1c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.6.0 */
+/* The following code was generated by JFlex 1.7.0 */
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -341,7 +341,7 @@ class WikipediaTokenizerImpl {
 
   /* error messages for the codes above */
   private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
+    "Unknown internal scanner error",
     "Error: could not match input",
     "Error: pushback value was too large"
   };
@@ -419,11 +419,11 @@ class WikipediaTokenizerImpl {
   private int yycolumn;
 
   /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
    */
   private boolean zzAtBOL = true;
 
-  /** zzAtEOF == true <=> the scanner is at the EOF */
+  /** zzAtEOF == true iff the scanner is at the EOF */
   private boolean zzAtEOF;
 
   /** denotes if the user-EOF-code has already been executed */
@@ -575,28 +575,29 @@ final void reset() {
     }
 
     /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
 
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
         if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
           --zzEndRead;
           zzFinalHighSurrogate = 1;
         }
       }
+      /* potentially more input available */
       return false;
     }
 
-    // totalRead = 0: End of stream
+    /* numRead < 0 ==> end of stream */
     return true;
   }
 
@@ -820,199 +821,245 @@ final void reset() {
       // store back cached position
       zzMarkedPos = zzMarkedPosL;
 
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 47: break;
-        case 2: 
-          { positionInc = 1; return ALPHANUM;
-          }
-        case 48: break;
-        case 3: 
-          { positionInc = 1; return CJ;
-          }
-        case 49: break;
-        case 4: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 50: break;
-        case 5: 
-          { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 51: break;
-        case 6: 
-          { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 52: break;
-        case 7: 
-          { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 53: break;
-        case 8: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
-          }
-        case 54: break;
-        case 9: 
-          { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
-          }
-        case 55: break;
-        case 10: 
-          { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 56: break;
-        case 11: 
-          { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 57: break;
-        case 12: 
-          { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
-          }
-        case 58: break;
-        case 13: 
-          { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 59: break;
-        case 14: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
-          }
-        case 60: break;
-        case 15: 
-          { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 61: break;
-        case 16: 
-          { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
-          }
-        case 62: break;
-        case 17: 
-          { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
-          }
-        case 63: break;
-        case 18: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
-          }
-        case 64: break;
-        case 19: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
-          }
-        case 65: break;
-        case 20: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 66: break;
-        case 21: 
-          { yybegin(STRING); return currentTokType;/*pipe*/
-          }
-        case 67: break;
-        case 22: 
-          { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 68: break;
-        case 23: 
-          { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 69: break;
-        case 24: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 70: break;
-        case 25: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 71: break;
-        case 26: 
-          { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 72: break;
-        case 27: 
-          { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 73: break;
-        case 28: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 74: break;
-        case 29: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 75: break;
-        case 30: 
-          { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 76: break;
-        case 31: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
-          }
-        case 77: break;
-        case 32: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 78: break;
-        case 33: 
-          { positionInc = 1; return APOSTROPHE;
-          }
-        case 79: break;
-        case 34: 
-          { positionInc = 1; return HOST;
-          }
-        case 80: break;
-        case 35: 
-          { positionInc = 1; return NUM;
-          }
-        case 81: break;
-        case 36: 
-          { positionInc = 1; return COMPANY;
-          }
-        case 82: break;
-        case 37: 
-          { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 83: break;
-        case 38: 
-          { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
-          }
-        case 84: break;
-        case 39: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
-          }
-        case 85: break;
-        case 40: 
-          { positionInc = 1; return ACRONYM;
-          }
-        case 86: break;
-        case 41: 
-          { positionInc = 1; return EMAIL;
-          }
-        case 87: break;
-        case 42: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
-          }
-        case 88: break;
-        case 43: 
-          { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
-          }
-        case 89: break;
-        case 44: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
-          }
-        case 90: break;
-        case 45: 
-          { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 91: break;
-        case 46: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
-          }
-        case 92: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-            return YYEOF;
-          } 
-          else {
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+        return YYEOF;
+      }
+      else {
+        switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+          case 1: 
+            { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 47: break;
+          case 2: 
+            { positionInc = 1; return ALPHANUM;
+            } 
+            // fall through
+          case 48: break;
+          case 3: 
+            { positionInc = 1; return CJ;
+            } 
+            // fall through
+          case 49: break;
+          case 4: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 50: break;
+          case 5: 
+            { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 51: break;
+          case 6: 
+            { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 52: break;
+          case 7: 
+            { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 53: break;
+          case 8: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+            } 
+            // fall through
+          case 54: break;
+          case 9: 
+            { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+            } 
+            // fall through
+          case 55: break;
+          case 10: 
+            { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 56: break;
+          case 11: 
+            { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 57: break;
+          case 12: 
+            { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
+            } 
+            // fall through
+          case 58: break;
+          case 13: 
+            { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 59: break;
+          case 14: 
+            { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 60: break;
+          case 15: 
+            { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 61: break;
+          case 16: 
+            { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
+            } 
+            // fall through
+          case 62: break;
+          case 17: 
+            { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
+            } 
+            // fall through
+          case 63: break;
+          case 18: 
+            { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
+            } 
+            // fall through
+          case 64: break;
+          case 19: 
+            { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
+            } 
+            // fall through
+          case 65: break;
+          case 20: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 66: break;
+          case 21: 
+            { yybegin(STRING); return currentTokType;/*pipe*/
+            } 
+            // fall through
+          case 67: break;
+          case 22: 
+            { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 68: break;
+          case 23: 
+            { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 69: break;
+          case 24: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 70: break;
+          case 25: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 71: break;
+          case 26: 
+            { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 72: break;
+          case 27: 
+            { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 73: break;
+          case 28: 
+            { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 74: break;
+          case 29: 
+            { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 75: break;
+          case 30: 
+            { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 76: break;
+          case 31: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
+            } 
+            // fall through
+          case 77: break;
+          case 32: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 78: break;
+          case 33: 
+            { positionInc = 1; return APOSTROPHE;
+            } 
+            // fall through
+          case 79: break;
+          case 34: 
+            { positionInc = 1; return HOST;
+            } 
+            // fall through
+          case 80: break;
+          case 35: 
+            { positionInc = 1; return NUM;
+            } 
+            // fall through
+          case 81: break;
+          case 36: 
+            { positionInc = 1; return COMPANY;
+            } 
+            // fall through
+          case 82: break;
+          case 37: 
+            { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 83: break;
+          case 38: 
+            { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
+            } 
+            // fall through
+          case 84: break;
+          case 39: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
+            } 
+            // fall through
+          case 85: break;
+          case 40: 
+            { positionInc = 1; return ACRONYM;
+            } 
+            // fall through
+          case 86: break;
+          case 41: 
+            { positionInc = 1; return EMAIL;
+            } 
+            // fall through
+          case 87: break;
+          case 42: 
+            { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
+            } 
+            // fall through
+          case 88: break;
+          case 43: 
+            { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+            } 
+            // fall through
+          case 89: break;
+          case 44: 
+            { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 90: break;
+          case 45: 
+            { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 91: break;
+          case 46: 
+            { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+            } 
+            // fall through
+          case 92: break;
+          default:
             zzScanError(ZZ_NO_MATCH);
-          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
index cf6c65a..758d5d2 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
@@ -499,7 +499,7 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
 
     String randomHtmlishString2 // Don't create a comment (disallow "<!--") and don't include a closing ">"
         = TestUtil.randomHtmlishString(random(), maxNumElems).replaceAll(">", " ").replaceFirst("^--","__");
-    String unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 +"-[CDATA[";
+    String unclosedAngleBangNonCDATA = "<!" + randomHtmlishString2 +"-[CDATA[";
 
     String[] testGold = {
         "one<![CDATA[<one><two>three<four></four></two></one>]]>two",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
index b3b0ce1..507eb09 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailAnalyzer.java
@@ -361,14 +361,14 @@ public class TestUAX29URLEmailAnalyzer extends BaseTokenStreamTestCase {
 
     StringBuilder bToken = new StringBuilder();
     // exact max length:
-    for(int i=0;i<StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;i++) {
+    for(int i=0;i<UAX29URLEmailAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;i++) {
       bToken.append('b');
     }
 
     String bString = bToken.toString();
     // first bString is exact max default length; next one is 1 too long
     String input = "x " + bString + " " + bString + "b";
-    assertAnalyzesTo(a, input.toString(), new String[] {"x", bString, bString, "b"});
+    assertAnalyzesTo(a, input, new String[] {"x", bString, bString, "b"});
     a.close();
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
index d9d8381..76c5d55 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizer.java
@@ -467,7 +467,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
   }
 
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+    WordBreakTestUnicode_9_0_0 wordBreakTest = new WordBreakTestUnicode_9_0_0();
     wordBreakTest.test(a);
   }
   
@@ -545,6 +545,80 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
   }
 
 
+  /** simple emoji */
+  public void testEmoji() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉ ๐Ÿ’ฉ๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "๐Ÿ’ฉ", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence */
+  public void testEmojiSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ",
+        new String[] { "๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** emoji zwj sequence with fitzpatrick modifier */
+  public void testEmojiSequenceWithModifier() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ",
+        new String[] { "๐Ÿ‘จ๐Ÿผโ€โš•๏ธ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  /** regional indicator */
+  public void testEmojiRegionalIndicator() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡บ๐Ÿ‡ธ",
+        new String[] { "๐Ÿ‡บ๐Ÿ‡ธ", "๐Ÿ‡บ๐Ÿ‡ธ" },
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  /** variation sequence */
+  public void testEmojiVariationSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#๏ธโƒฃ",
+        new String[] { "#๏ธโƒฃ" },
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3๏ธโƒฃ",
+        new String[] { "3๏ธโƒฃ",},
+        new String[] { "<EMOJI>" });
+
+    // text presentation sequences
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "#\uFE0E",
+        new String[] { },
+        new String[] { });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3\uFE0E",  // \uFE0E is included in \p{WB:Extend}
+        new String[] { "3\uFE0E",},
+        new String[] { "<NUM>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E",     // \u2B55 = HEAVY BLACK CIRCLE
+        new String[] { "\u2B55",},
+        new String[] { "<EMOJI>" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u2B55\uFE0E\u200D\u2B55\uFE0E",
+        new String[] { "\u2B55", "\u200D\u2B55"},
+        new String[] { "<EMOJI>", "<EMOJI>" });
+  }
+
+  public void testEmojiTagSequence() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ",
+        new String[] { "๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ" },
+        new String[] { "<EMOJI>" });
+  }
+
+  public void testEmojiTokenization() throws Exception {
+    // simple emoji around latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "poo๐Ÿ’ฉpoo",
+        new String[] { "poo", "๐Ÿ’ฉ", "poo" },
+        new String[] { "<ALPHANUM>", "<EMOJI>", "<ALPHANUM>" });
+    // simple emoji around non-latin
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "๐Ÿ’ฉไธญๅœ‹๐Ÿ’ฉ",
+        new String[] { "๐Ÿ’ฉ", "ไธญ", "ๅœ‹", "๐Ÿ’ฉ" },
+        new String[] { "<EMOJI>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<EMOJI>" });
+  }
+
+  public void testUnicodeEmojiTests() throws Exception {
+    EmojiTokenizationTestUnicode_11_0 emojiTest = new EmojiTokenizationTestUnicode_11_0();
+    emojiTest.test(a);
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index 789fc5f..0dc3884 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -2388,7 +2388,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
 
   <!-- JFlex task -->
   <target name="-install-jflex" unless="jflex.loaded" depends="ivy-availability-check,ivy-configure">
-    <ivy:cachepath organisation="de.jflex" module="jflex" revision="1.6.0"
+    <ivy:cachepath organisation="de.jflex" module="jflex" revision="1.7.0"
                    inline="true" conf="default" transitive="true" pathid="jflex.classpath"/>
     <taskdef name="jflex" classname="jflex.anttask.JFlexTask" classpathref="jflex.classpath"/>
     <property name="jflex.loaded" value="true"/>
@@ -2645,7 +2645,11 @@ The following arguments can be provided to ant to alter its behaviour and target
     <attribute name="dir"/>
     <attribute name="name"/>
     <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+      <!-- The default skeleton is specified here to work around a JFlex ant task bug:    -->
+      <!-- invocations with a non-default skeleton will cause following invocations to    -->
+      <!-- use the same skeleton, though not specified, unless the default is configured. -->
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on"
+             skeleton="${common.dir}/core/src/data/jflex/skeleton.default"/>
     </sequential>
   </macrodef>
 
@@ -2653,20 +2657,13 @@ The following arguments can be provided to ant to alter its behaviour and target
     <attribute name="dir"/>
     <attribute name="name"/>
     <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
       <!-- LUCENE-5897: Disallow scanner buffer expansion -->
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
-                     replace="" flags="s" />
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on"
+             skeleton="${common.dir}/core/src/data/jflex/skeleton.disable.buffer.expansion.txt"/>
+      <!-- Since the ZZ_BUFFERSIZE declaration is generated rather than in the skeleton, we have to transform it here. -->
       <replaceregexp file="@{dir}/@{name}.java"
                      match="private static final int ZZ_BUFFERSIZE ="
                      replace="private int ZZ_BUFFERSIZE ="/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="int requested = zzBuffer.length - zzEndRead;"
-                     replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="(zzFinalHighSurrogate = 1;)(\r?\n)"
-                     replace="\1\2          if (totalRead == 1) { return true; }\2"/>
     </sequential>
   </macrodef>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex b/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
new file mode 100644
index 0000000..c631dee
--- /dev/null
+++ b/lucene/core/src/data/jflex/UnicodeEmojiProperties.jflex
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file was automatically generated by getUnicodeEmojiProperties.pl
+// from: http://unicode.org/Public/emoji/11.0/emoji-data.txt 
+
+Emoji = [\u{23}\u{2A}\u{30}-\u{39}\u{A9}\u{AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2604}\u{260E}\u{2611}\u{2614}-\u{2615}\u{2618}\u{261D}\u{2620}\u{2622}-\u{2623}\u{2626}\u{262A}\u{262E}-\u{262F}\u{2638}-\u{263A}\u{2640}\u{2642}\u{2648}-\u{2653}\u{265F}-\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267E}-\u{267F}\u{2692}-\u{2697}\u{2699}\u{269B}-\u{269C}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26B0}-\u{26B1}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26C8}\u{26CE}-\u{26CF}\u{26D1}\u{26D3}-\u{26D4}\u{26E9}-\u{26EA}\u{26F0}-\u{26F5}\u{26F7}-\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270D}\u{270F}\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{
 2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E6}-\u{1F1FF}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}-\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F7}-\u{1F4FD}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}-\u{1F570}\u{1F573}-\u{1F57A}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}-\u{1F596}\u{1F5A4}-\u{1F5A5}\u{1F5A8}\u{1F5B1}-\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5E8}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D2}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6EC}\u{1F6F0}\u{1F6F3}-\u{1F6F9}\u{1F910}-\u{1F93A}\u{1F93C}-\u{1F93E}\u{1F940}-\u{1F945}\u{1F947}-\u{1F970}\u{1F973}-\u{1F976}\u{1F97A}\u{1F97C}-\u{1F9A2}\u{1F9B0}-\u{1F9B9}\u{1F9C0}-\u{1F9C2}\u{1F9D0}-\u{1F9FF}]
+Emoji_Modifier = [\u{1F3FB}-\u{1F3FF}]
+Emoji_Modifier_Base = [\u{261D}\u{26F9}\u{270A}-\u{270D}\u{1F385}\u{1F3C2}-\u{1F3C4}\u{1F3C7}\u{1F3CA}-\u{1F3CC}\u{1F442}-\u{1F443}\u{1F446}-\u{1F450}\u{1F466}-\u{1F469}\u{1F46E}\u{1F470}-\u{1F478}\u{1F47C}\u{1F481}-\u{1F483}\u{1F485}-\u{1F487}\u{1F4AA}\u{1F574}-\u{1F575}\u{1F57A}\u{1F590}\u{1F595}-\u{1F596}\u{1F645}-\u{1F647}\u{1F64B}-\u{1F64F}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F6C0}\u{1F6CC}\u{1F918}-\u{1F91C}\u{1F91E}-\u{1F91F}\u{1F926}\u{1F930}-\u{1F939}\u{1F93D}-\u{1F93E}\u{1F9B5}-\u{1F9B6}\u{1F9B8}-\u{1F9B9}\u{1F9D1}-\u{1F9DD}]
+Extended_Pictographic = [\u{A9}\u{AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{2388}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2605}\u{2607}-\u{2612}\u{2614}-\u{2685}\u{2690}-\u{2705}\u{2708}-\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2767}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F000}-\u{1F0FF}\u{1F10D}-\u{1F10F}\u{1F12F}\u{1F16C}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1AD}-\u{1F1E5}\u{1F201}-\u{1F20F}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F23C}-\u{1F23F}\u{1F249}-\u{1F3FA}\u{1F400}-\u{1F53D}\u{1F546}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{1F774}-\u{1F77F}\u{1F7D5}-\u{1F7FF}\u{1F80C}-\u{1F80F}\u{1F848}-\u{1F84F}\u{1F85A}-\u{1F85F}\u{1F888}-\u{1F88F}\u{1F8AE
 }-\u{1F8FF}\u{1F90C}-\u{1F93A}\u{1F93C}-\u{1F945}\u{1F947}-\u{1FFFD}]
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl b/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
new file mode 100644
index 0000000..e818b64
--- /dev/null
+++ b/lucene/core/src/data/jflex/getUnicodeEmojiProperties.pl
@@ -0,0 +1,168 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+/) {
+    print STDERR "Usage: $script_name -v <version>\n";
+    print STDERR "\tversion must be of the form X.Y, e.g. 9.0\n"
+        if ($version);
+    exit 1;
+}
+my $emoji_data_url = "http://unicode.org/Public/emoji/$version/emoji-data.txt";
+my $output_filename = "UnicodeEmojiProperties.jflex";
+my $header =<<"__HEADER__";
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file was automatically generated by ${script_name}
+// from: ${emoji_data_url} 
+
+__HEADER__
+
+my $property_ranges = {};
+my $wanted_properties = { 'Emoji' => 1, 'Emoji_Modifier' => 1, 'Emoji_Modifier_Base' => 1, 'Extended_Pictographic' => 1 };
+
+parse_emoji_data_file($emoji_data_url, $property_ranges, $wanted_properties);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+output_jflex_include_file($output_path, $property_ranges);
+
+
+# sub parse_emoji_data_file
+#
+# Downloads and parses the emoji_data.txt file, extracting code point ranges
+# assigned to property values with age not younger than the passed-in version,
+# except for the Extended_Pictographic property, for which all code point ranges
+# are extracted, regardless of age.
+#
+# Parameters:
+#
+#  - Emoji data file URL
+#  - Reference to hash of properties mapped to an array of alternating (start,end) code point ranges
+#  - Reference to hash of wanted property names
+#
+sub parse_emoji_data_file {
+    my $url = shift;
+    my $prop_ranges = shift;
+    my $wanted_props = shift;
+    my $content = get_URL_content($url);
+    print STDERR "Parsing '$url'...";
+    my @lines = split /\r?\n/, $content;
+    for (@lines) {
+        ## 231A..231B    ; Emoji_Presentation   #  1.1  [2] (โŒš..โŒ›)    watch..hourglass done
+        ## 1F9C0         ; Emoji_Presentation   #  8.0  [1] (๐Ÿง€)       cheese wedge
+        ## 1FA00..1FA5F  ; Extended_Pictographic#   NA [96] (๐Ÿจ€๏ธ..๐ŸฉŸ๏ธ)    <reserved-1FA00>..<reserved-1FA5F>
+        if (my ($start,$end,$prop) = /^([0-9A-F]{4,5})(?:\.\.([0-9A-F]{4,5}))?\s*;\s*([^\s#]+)/) {
+            next unless defined($wanted_props->{$prop});  # Skip unless we want ranges for this property
+            
+            if (not defined($prop_ranges->{$prop})) {
+                $prop_ranges->{$prop} = [];
+            }
+            $end = $start unless defined($end);
+            my $start_dec = hex $start;
+            my $end_dec = hex $end;
+            my $ranges = $prop_ranges->{$prop};
+            if (scalar(@$ranges) == 0 || $start_dec > $ranges->[-1] + 1) { # Can't merge range with previous range
+                # print STDERR "Adding new range ($start, $end)\n";
+                push @$ranges, $start_dec, $end_dec;
+            } else {
+                # printf STDERR "Merging range (%s, %s) with previous range (%X, %X)\n", $start, $end, $ranges->[-2], $ranges->[-1];
+                $ranges->[-1] = $end_dec;
+            }
+        } else {
+            # print STDERR "Skipping line (no data): $_\n";
+        }
+    }
+    print STDERR "done.\n";
+}
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+# Parameter:
+#
+#  - URL to get content for
+#
+sub get_URL_content {
+    my $url = shift;
+    print STDERR "Retrieving '$url'...";
+    my $user_agent = LWP::UserAgent->new;
+    my $request = HTTP::Request->new(GET => $url);
+    my $response = $user_agent->request($request);
+    unless ($response->is_success) {
+        print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+        exit 1;
+    }
+    print STDERR "done.\n";
+    return $response->content;
+}
+
+
+# sub output_jflex_include_file
+#
+# Parameters:
+#
+#  - Output path
+#  - Reference to hash mapping properties to an array of alternating (start,end) codepoint ranges
+#     
+sub output_jflex_include_file {
+    my $path = shift;
+    my $prop_ranges = shift;
+    open OUT, ">$path"
+        || die "Error opening '$path' for writing: $!";
+
+    print STDERR "Writing '$path'...";
+
+    print OUT $header;
+
+    for my $prop (sort keys %$prop_ranges) {
+        my $ranges = $prop_ranges->{$prop};
+        print OUT "$prop = [";
+        for (my $index = 0 ; $index < scalar(@$ranges) ; $index += 2) {
+            printf OUT "\\u{%X}", $ranges->[$index];
+            printf OUT "-\\u{%X}", $ranges->[$index + 1] if ($ranges->[$index + 1] > $ranges->[$index]);
+        }
+        print OUT "]\n";
+    }
+
+    print OUT "\n";
+    close OUT;
+    print STDERR "done.\n";
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/core/src/data/jflex/skeleton.default
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/skeleton.default b/lucene/core/src/data/jflex/skeleton.default
new file mode 100644
index 0000000..9e08fbb
--- /dev/null
+++ b/lucene/core/src/data/jflex/skeleton.default
@@ -0,0 +1,342 @@
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+--- private static final int ZZ_BUFFERSIZE = ...;
+
+  /** lexical states */
+---  lexical states, charmap
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unknown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+--- isFinal list
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true iff the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+--- user class code
+
+--- constructor declaration
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[zzBuffer.length*2];
+      System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+      zzBuffer = newBuffer;
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+    }
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead;
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      /* If numRead == requested, we might have requested to few chars to
+         encode a full Unicode character. We assume that a Reader would
+         otherwise never return half characters. */
+      if (numRead == requested) {
+        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+        }
+      }
+      /* potentially more input available */
+      return false;
+    }
+
+    /* numRead < 0 ==> end of stream */
+    return true;
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+--- zzScanError declaration
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+--- throws clause
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+--- yypushback decl (contains zzScanError exception)
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+--- zzDoEOF
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+--- yylex declaration
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+--- local declarations
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+--- start admin (line, char, col count)
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+--- start admin (lexstate etc)
+
+      zzForAction: {
+        while (true) {
+    
+--- next input, line, col, char count, next transition, isFinal action
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+--- line count update
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+--- char count update
+
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+--- eofvalue
+      }
+      else {
+--- actions
+          default:
+--- no match
+        }
+      }
+    }
+  }
+
+--- main
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0e903cab/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
----------------------------------------------------------------------
diff --git a/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
new file mode 100644
index 0000000..a9dabcf
--- /dev/null
+++ b/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
@@ -0,0 +1,348 @@
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+--- private static final int ZZ_BUFFERSIZE = ...;
+
+  /** lexical states */
+---  lexical states, charmap
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unknown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+--- isFinal list
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true iff the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true iff the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+--- user class code
+
+--- constructor declaration
+
+/* -------------------------------------------------------------------------------- */
+/* Begin Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;
+    if (requested == 0) {
+      return true;
+    }
+    int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
+
+    /* not supposed to occur according to specification of java.io.Reader */
+    if (numRead == 0) {
+      throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround.");
+    }
+    if (numRead > 0) {
+      zzEndRead += numRead;
+      if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+        if (numRead == requested) { // We might have requested too few chars to encode a full Unicode character.
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+          if (numRead == 1) {
+            return true;
+          }
+        } else {                    // There is room in the buffer for at least one more char
+          int c = zzReader.read();  // Expecting to read a low surrogate char
+          if (c == -1) {
+            return true;
+          } else {
+            zzBuffer[zzEndRead++] = (char)c;
+            return false;
+          }
+        }
+      }
+      /* potentially more input available */
+      return false;
+    }
+
+    /* numRead < 0 ==> end of stream */
+    return true;
+  }
+
+/* End Lucene-specific disable-buffer-expansion modifications to skeleton.default */
+/* ------------------------------------------------------------------------------ */
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+--- zzScanError declaration
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+--- throws clause
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+--- yypushback decl (contains zzScanError exception)
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+--- zzDoEOF
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+--- yylex declaration
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+--- local declarations
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+--- start admin (line, char, col count)
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+--- start admin (lexstate etc)
+
+      zzForAction: {
+        while (true) {
+    
+--- next input, line, col, char count, next transition, isFinal action
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+--- line count update
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+--- char count update
+
+      if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+        zzAtEOF = true;
+--- eofvalue
+      }
+      else {
+--- actions
+          default:
+--- no match
+        }
+      }
+    }
+  }
+
+--- main
+
+}