You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2011/12/12 15:19:18 UTC

[lucy-commits] svn commit: r1213252 [3/4] - in /incubator/lucy/trunk: core/Lucy/Analysis/ core/Lucy/Test/Analysis/ devel/bin/ devel/conf/ modules/unicode/ucd/ perl/ perl/buildlib/Lucy/ perl/lib/Lucy/ perl/lib/Lucy/Analysis/ perl/t/ perl/t/core/

Added: incubator/lucy/trunk/modules/unicode/ucd/WordBreakTest.json
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/modules/unicode/ucd/WordBreakTest.json?rev=1213252&view=auto
==============================================================================
--- incubator/lucy/trunk/modules/unicode/ucd/WordBreakTest.json (added)
+++ incubator/lucy/trunk/modules/unicode/ucd/WordBreakTest.json Mon Dec 12 14:19:17 2011
@@ -0,0 +1,5720 @@
+[
+   {
+      "text" : "\u0001\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\u0001\r",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈\r",
+      "words" : []
+   },
+   {
+      "text" : "\u0001\n",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈\n",
+      "words" : []
+   },
+   {
+      "text" : "\u0001\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\u0001〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\u0001̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\u0001A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\u0001̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\u0001:",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈:",
+      "words" : []
+   },
+   {
+      "text" : "\u0001,",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈,",
+      "words" : []
+   },
+   {
+      "text" : "\u0001'",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈'",
+      "words" : []
+   },
+   {
+      "text" : "\u00010",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\u0001̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\u0001_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\u0001̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\u0001­",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈­",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̀",
+      "words" : []
+   },
+   {
+      "text" : "\u0001̈̀",
+      "words" : []
+   },
+   {
+      "text" : "\u0001a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\u0001̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\u0001a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u0001̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u0001a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u0001̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u0001a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u0001̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u0001a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u0001̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u00011:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u0001̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u00011'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u0001̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u00011,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u0001̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u00011.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u0001̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\r̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\r\r",
+      "words" : []
+   },
+   {
+      "text" : "\r̈\r",
+      "words" : []
+   },
+   {
+      "text" : "\r\n",
+      "words" : []
+   },
+   {
+      "text" : "\r̈\n",
+      "words" : []
+   },
+   {
+      "text" : "\r\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\r̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\r〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\r̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\rA",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\r̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\r:",
+      "words" : []
+   },
+   {
+      "text" : "\r̈:",
+      "words" : []
+   },
+   {
+      "text" : "\r,",
+      "words" : []
+   },
+   {
+      "text" : "\r̈,",
+      "words" : []
+   },
+   {
+      "text" : "\r'",
+      "words" : []
+   },
+   {
+      "text" : "\r̈'",
+      "words" : []
+   },
+   {
+      "text" : "\r0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\r̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\r_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\r̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\r­",
+      "words" : []
+   },
+   {
+      "text" : "\r̈­",
+      "words" : []
+   },
+   {
+      "text" : "\r̀",
+      "words" : []
+   },
+   {
+      "text" : "\r̈̀",
+      "words" : []
+   },
+   {
+      "text" : "\ra⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\r̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\ra:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\r̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\ra'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\r̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\ra'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\r̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\ra,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\r̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\r1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\r̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\n̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\n\r",
+      "words" : []
+   },
+   {
+      "text" : "\n̈\r",
+      "words" : []
+   },
+   {
+      "text" : "\n\n",
+      "words" : []
+   },
+   {
+      "text" : "\n̈\n",
+      "words" : []
+   },
+   {
+      "text" : "\n\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\n̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\n〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\n̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\nA",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\n̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\n:",
+      "words" : []
+   },
+   {
+      "text" : "\n̈:",
+      "words" : []
+   },
+   {
+      "text" : "\n,",
+      "words" : []
+   },
+   {
+      "text" : "\n̈,",
+      "words" : []
+   },
+   {
+      "text" : "\n'",
+      "words" : []
+   },
+   {
+      "text" : "\n̈'",
+      "words" : []
+   },
+   {
+      "text" : "\n0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\n̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\n_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\n̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\n­",
+      "words" : []
+   },
+   {
+      "text" : "\n̈­",
+      "words" : []
+   },
+   {
+      "text" : "\ǹ",
+      "words" : []
+   },
+   {
+      "text" : "\n̈̀",
+      "words" : []
+   },
+   {
+      "text" : "\na⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\n̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\na:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\n̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\na'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\n̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\na'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\n̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\na,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\n̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\n1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\n̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : "\u000b\r",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈\r",
+      "words" : []
+   },
+   {
+      "text" : "\u000b\n",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈\n",
+      "words" : []
+   },
+   {
+      "text" : "\u000b\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : "\u000b〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\u000b̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "\u000bA",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\u000b̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "\u000b:",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈:",
+      "words" : []
+   },
+   {
+      "text" : "\u000b,",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈,",
+      "words" : []
+   },
+   {
+      "text" : "\u000b'",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈'",
+      "words" : []
+   },
+   {
+      "text" : "\u000b0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\u000b̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "\u000b_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\u000b̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "\u000b­",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈­",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̀",
+      "words" : []
+   },
+   {
+      "text" : "\u000b̈̀",
+      "words" : []
+   },
+   {
+      "text" : "\u000ba⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\u000b̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "\u000ba:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000b̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000ba'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000b̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000ba'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000b̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000ba,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000b̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "\u000b1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "\u000b̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "〱\u0001",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "〱̈\u0001",
+      "words" : [
+         "〱̈"
+      ]
+   },
+   {
+      "text" : "〱\r",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "〱̈\r",
+      "words" : [
+         "〱̈"
+      ]
+   },
+   {
+      "text" : "〱\n",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "〱̈\n",
+      "words" : [
+         "〱̈"
+      ]
+   },
+   {
+      "text" : "〱\u000b",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "〱̈\u000b",
+      "words" : [
+         "〱̈"
+      ]
+   },
+   {
+      "text" : "〱〱",
+      "words" : [
+         "〱〱"
+      ]
+   },
+   {
+      "text" : "〱̈〱",
+      "words" : [
+         "〱̈〱"
+      ]
+   },
+   {
+      "text" : "〱A",
+      "words" : [
+         "〱",
+         "A"
+      ]
+   },
+   {
+      "text" : "〱̈A",
+      "words" : [
+         "〱̈",
+         "A"
+      ]
+   },
+   {
+      "text" : "〱:",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "〱̈:",
+      "words" : [
+         "〱̈"
+      ]
+   },
+   {
+      "text" : "〱,",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "〱̈,",
+      "words" : [
+         "〱̈"
+      ]
+   },
+   {
+      "text" : "〱'",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "〱̈'",
+      "words" : [
+         "〱̈"
+      ]
+   },
+   {
+      "text" : "〱0",
+      "words" : [
+         "〱",
+         "0"
+      ]
+   },
+   {
+      "text" : "〱̈0",
+      "words" : [
+         "〱̈",
+         "0"
+      ]
+   },
+   {
+      "text" : "〱_",
+      "words" : [
+         "〱_"
+      ]
+   },
+   {
+      "text" : "〱̈_",
+      "words" : [
+         "〱̈_"
+      ]
+   },
+   {
+      "text" : "〱­",
+      "words" : [
+         "〱­"
+      ]
+   },
+   {
+      "text" : "〱̈­",
+      "words" : [
+         "〱̈­"
+      ]
+   },
+   {
+      "text" : "〱̀",
+      "words" : [
+         "〱̀"
+      ]
+   },
+   {
+      "text" : "〱̈̀",
+      "words" : [
+         "〱̈̀"
+      ]
+   },
+   {
+      "text" : "〱a⁠",
+      "words" : [
+         "〱",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "〱̈a⁠",
+      "words" : [
+         "〱̈",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "〱a:",
+      "words" : [
+         "〱",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱̈a:",
+      "words" : [
+         "〱̈",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱a'",
+      "words" : [
+         "〱",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱̈a'",
+      "words" : [
+         "〱̈",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱a'⁠",
+      "words" : [
+         "〱",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱̈a'⁠",
+      "words" : [
+         "〱̈",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱a,",
+      "words" : [
+         "〱",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱̈a,",
+      "words" : [
+         "〱̈",
+         "a"
+      ]
+   },
+   {
+      "text" : "〱1:",
+      "words" : [
+         "〱",
+         "1"
+      ]
+   },
+   {
+      "text" : "〱̈1:",
+      "words" : [
+         "〱̈",
+         "1"
+      ]
+   },
+   {
+      "text" : "〱1'",
+      "words" : [
+         "〱",
+         "1"
+      ]
+   },
+   {
+      "text" : "〱̈1'",
+      "words" : [
+         "〱̈",
+         "1"
+      ]
+   },
+   {
+      "text" : "〱1,",
+      "words" : [
+         "〱",
+         "1"
+      ]
+   },
+   {
+      "text" : "〱̈1,",
+      "words" : [
+         "〱̈",
+         "1"
+      ]
+   },
+   {
+      "text" : "〱1.⁠",
+      "words" : [
+         "〱",
+         "1"
+      ]
+   },
+   {
+      "text" : "〱̈1.⁠",
+      "words" : [
+         "〱̈",
+         "1"
+      ]
+   },
+   {
+      "text" : "A\u0001",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "Ä\u0001",
+      "words" : [
+         "Ä"
+      ]
+   },
+   {
+      "text" : "A\r",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "Ä\r",
+      "words" : [
+         "Ä"
+      ]
+   },
+   {
+      "text" : "A\n",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "Ä\n",
+      "words" : [
+         "Ä"
+      ]
+   },
+   {
+      "text" : "A\u000b",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "Ä\u000b",
+      "words" : [
+         "Ä"
+      ]
+   },
+   {
+      "text" : "A〱",
+      "words" : [
+         "A",
+         "〱"
+      ]
+   },
+   {
+      "text" : "Ä〱",
+      "words" : [
+         "Ä",
+         "〱"
+      ]
+   },
+   {
+      "text" : "AA",
+      "words" : [
+         "AA"
+      ]
+   },
+   {
+      "text" : "ÄA",
+      "words" : [
+         "ÄA"
+      ]
+   },
+   {
+      "text" : "A:",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "Ä:",
+      "words" : [
+         "Ä"
+      ]
+   },
+   {
+      "text" : "A,",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "Ä,",
+      "words" : [
+         "Ä"
+      ]
+   },
+   {
+      "text" : "A'",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "Ä'",
+      "words" : [
+         "Ä"
+      ]
+   },
+   {
+      "text" : "A0",
+      "words" : [
+         "A0"
+      ]
+   },
+   {
+      "text" : "Ä0",
+      "words" : [
+         "Ä0"
+      ]
+   },
+   {
+      "text" : "A_",
+      "words" : [
+         "A_"
+      ]
+   },
+   {
+      "text" : "Ä_",
+      "words" : [
+         "Ä_"
+      ]
+   },
+   {
+      "text" : "A­",
+      "words" : [
+         "A­"
+      ]
+   },
+   {
+      "text" : "Ä­",
+      "words" : [
+         "Ä­"
+      ]
+   },
+   {
+      "text" : "À",
+      "words" : [
+         "À"
+      ]
+   },
+   {
+      "text" : "Ä̀",
+      "words" : [
+         "Ä̀"
+      ]
+   },
+   {
+      "text" : "Aa⁠",
+      "words" : [
+         "Aa⁠"
+      ]
+   },
+   {
+      "text" : "Äa⁠",
+      "words" : [
+         "Äa⁠"
+      ]
+   },
+   {
+      "text" : "Aa:",
+      "words" : [
+         "Aa"
+      ]
+   },
+   {
+      "text" : "Äa:",
+      "words" : [
+         "Äa"
+      ]
+   },
+   {
+      "text" : "Aa'",
+      "words" : [
+         "Aa"
+      ]
+   },
+   {
+      "text" : "Äa'",
+      "words" : [
+         "Äa"
+      ]
+   },
+   {
+      "text" : "Aa'⁠",
+      "words" : [
+         "Aa"
+      ]
+   },
+   {
+      "text" : "Äa'⁠",
+      "words" : [
+         "Äa"
+      ]
+   },
+   {
+      "text" : "Aa,",
+      "words" : [
+         "Aa"
+      ]
+   },
+   {
+      "text" : "Äa,",
+      "words" : [
+         "Äa"
+      ]
+   },
+   {
+      "text" : "A1:",
+      "words" : [
+         "A1"
+      ]
+   },
+   {
+      "text" : "Ä1:",
+      "words" : [
+         "Ä1"
+      ]
+   },
+   {
+      "text" : "A1'",
+      "words" : [
+         "A1"
+      ]
+   },
+   {
+      "text" : "Ä1'",
+      "words" : [
+         "Ä1"
+      ]
+   },
+   {
+      "text" : "A1,",
+      "words" : [
+         "A1"
+      ]
+   },
+   {
+      "text" : "Ä1,",
+      "words" : [
+         "Ä1"
+      ]
+   },
+   {
+      "text" : "A1.⁠",
+      "words" : [
+         "A1"
+      ]
+   },
+   {
+      "text" : "Ä1.⁠",
+      "words" : [
+         "Ä1"
+      ]
+   },
+   {
+      "text" : ":\u0001",
+      "words" : []
+   },
+   {
+      "text" : ":̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : ":\r",
+      "words" : []
+   },
+   {
+      "text" : ":̈\r",
+      "words" : []
+   },
+   {
+      "text" : ":\n",
+      "words" : []
+   },
+   {
+      "text" : ":̈\n",
+      "words" : []
+   },
+   {
+      "text" : ":\u000b",
+      "words" : []
+   },
+   {
+      "text" : ":̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : ":〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : ":̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : ":A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : ":̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "::",
+      "words" : []
+   },
+   {
+      "text" : ":̈:",
+      "words" : []
+   },
+   {
+      "text" : ":,",
+      "words" : []
+   },
+   {
+      "text" : ":̈,",
+      "words" : []
+   },
+   {
+      "text" : ":'",
+      "words" : []
+   },
+   {
+      "text" : ":̈'",
+      "words" : []
+   },
+   {
+      "text" : ":0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : ":̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : ":_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : ":̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : ":­",
+      "words" : []
+   },
+   {
+      "text" : ":̈­",
+      "words" : []
+   },
+   {
+      "text" : ":̀",
+      "words" : []
+   },
+   {
+      "text" : ":̈̀",
+      "words" : []
+   },
+   {
+      "text" : ":a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : ":̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : ":a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ":1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ":̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ":1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ":̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ":1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ":̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ":1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ":̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",\u0001",
+      "words" : []
+   },
+   {
+      "text" : ",̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : ",\r",
+      "words" : []
+   },
+   {
+      "text" : ",̈\r",
+      "words" : []
+   },
+   {
+      "text" : ",\n",
+      "words" : []
+   },
+   {
+      "text" : ",̈\n",
+      "words" : []
+   },
+   {
+      "text" : ",\u000b",
+      "words" : []
+   },
+   {
+      "text" : ",̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : ",〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : ",̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : ",A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : ",̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : ",:",
+      "words" : []
+   },
+   {
+      "text" : ",̈:",
+      "words" : []
+   },
+   {
+      "text" : ",,",
+      "words" : []
+   },
+   {
+      "text" : ",̈,",
+      "words" : []
+   },
+   {
+      "text" : ",'",
+      "words" : []
+   },
+   {
+      "text" : ",̈'",
+      "words" : []
+   },
+   {
+      "text" : ",0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : ",̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : ",_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : ",̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : ",­",
+      "words" : []
+   },
+   {
+      "text" : ",̈­",
+      "words" : []
+   },
+   {
+      "text" : ",̀",
+      "words" : []
+   },
+   {
+      "text" : ",̈̀",
+      "words" : []
+   },
+   {
+      "text" : ",a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : ",̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : ",a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : ",1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : ",̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'\u0001",
+      "words" : []
+   },
+   {
+      "text" : "'̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : "'\r",
+      "words" : []
+   },
+   {
+      "text" : "'̈\r",
+      "words" : []
+   },
+   {
+      "text" : "'\n",
+      "words" : []
+   },
+   {
+      "text" : "'̈\n",
+      "words" : []
+   },
+   {
+      "text" : "'\u000b",
+      "words" : []
+   },
+   {
+      "text" : "'̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : "'〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "'̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "'A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "'̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "':",
+      "words" : []
+   },
+   {
+      "text" : "'̈:",
+      "words" : []
+   },
+   {
+      "text" : "',",
+      "words" : []
+   },
+   {
+      "text" : "'̈,",
+      "words" : []
+   },
+   {
+      "text" : "''",
+      "words" : []
+   },
+   {
+      "text" : "'̈'",
+      "words" : []
+   },
+   {
+      "text" : "'0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "'̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "'_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "'̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "'­",
+      "words" : []
+   },
+   {
+      "text" : "'̈­",
+      "words" : []
+   },
+   {
+      "text" : "'̀",
+      "words" : []
+   },
+   {
+      "text" : "'̈̀",
+      "words" : []
+   },
+   {
+      "text" : "'a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "'̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "'a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "'1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "'̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "0\u0001",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "0̈\u0001",
+      "words" : [
+         "0̈"
+      ]
+   },
+   {
+      "text" : "0\r",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "0̈\r",
+      "words" : [
+         "0̈"
+      ]
+   },
+   {
+      "text" : "0\n",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "0̈\n",
+      "words" : [
+         "0̈"
+      ]
+   },
+   {
+      "text" : "0\u000b",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "0̈\u000b",
+      "words" : [
+         "0̈"
+      ]
+   },
+   {
+      "text" : "0〱",
+      "words" : [
+         "0",
+         "〱"
+      ]
+   },
+   {
+      "text" : "0̈〱",
+      "words" : [
+         "0̈",
+         "〱"
+      ]
+   },
+   {
+      "text" : "0A",
+      "words" : [
+         "0A"
+      ]
+   },
+   {
+      "text" : "0̈A",
+      "words" : [
+         "0̈A"
+      ]
+   },
+   {
+      "text" : "0:",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "0̈:",
+      "words" : [
+         "0̈"
+      ]
+   },
+   {
+      "text" : "0,",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "0̈,",
+      "words" : [
+         "0̈"
+      ]
+   },
+   {
+      "text" : "0'",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "0̈'",
+      "words" : [
+         "0̈"
+      ]
+   },
+   {
+      "text" : "00",
+      "words" : [
+         "00"
+      ]
+   },
+   {
+      "text" : "0̈0",
+      "words" : [
+         "0̈0"
+      ]
+   },
+   {
+      "text" : "0_",
+      "words" : [
+         "0_"
+      ]
+   },
+   {
+      "text" : "0̈_",
+      "words" : [
+         "0̈_"
+      ]
+   },
+   {
+      "text" : "0­",
+      "words" : [
+         "0­"
+      ]
+   },
+   {
+      "text" : "0̈­",
+      "words" : [
+         "0̈­"
+      ]
+   },
+   {
+      "text" : "0̀",
+      "words" : [
+         "0̀"
+      ]
+   },
+   {
+      "text" : "0̈̀",
+      "words" : [
+         "0̈̀"
+      ]
+   },
+   {
+      "text" : "0a⁠",
+      "words" : [
+         "0a⁠"
+      ]
+   },
+   {
+      "text" : "0̈a⁠",
+      "words" : [
+         "0̈a⁠"
+      ]
+   },
+   {
+      "text" : "0a:",
+      "words" : [
+         "0a"
+      ]
+   },
+   {
+      "text" : "0̈a:",
+      "words" : [
+         "0̈a"
+      ]
+   },
+   {
+      "text" : "0a'",
+      "words" : [
+         "0a"
+      ]
+   },
+   {
+      "text" : "0̈a'",
+      "words" : [
+         "0̈a"
+      ]
+   },
+   {
+      "text" : "0a'⁠",
+      "words" : [
+         "0a"
+      ]
+   },
+   {
+      "text" : "0̈a'⁠",
+      "words" : [
+         "0̈a"
+      ]
+   },
+   {
+      "text" : "0a,",
+      "words" : [
+         "0a"
+      ]
+   },
+   {
+      "text" : "0̈a,",
+      "words" : [
+         "0̈a"
+      ]
+   },
+   {
+      "text" : "01:",
+      "words" : [
+         "01"
+      ]
+   },
+   {
+      "text" : "0̈1:",
+      "words" : [
+         "0̈1"
+      ]
+   },
+   {
+      "text" : "01'",
+      "words" : [
+         "01"
+      ]
+   },
+   {
+      "text" : "0̈1'",
+      "words" : [
+         "0̈1"
+      ]
+   },
+   {
+      "text" : "01,",
+      "words" : [
+         "01"
+      ]
+   },
+   {
+      "text" : "0̈1,",
+      "words" : [
+         "0̈1"
+      ]
+   },
+   {
+      "text" : "01.⁠",
+      "words" : [
+         "01"
+      ]
+   },
+   {
+      "text" : "0̈1.⁠",
+      "words" : [
+         "0̈1"
+      ]
+   },
+   {
+      "text" : "_\u0001",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "_̈\u0001",
+      "words" : [
+         "_̈"
+      ]
+   },
+   {
+      "text" : "_\r",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "_̈\r",
+      "words" : [
+         "_̈"
+      ]
+   },
+   {
+      "text" : "_\n",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "_̈\n",
+      "words" : [
+         "_̈"
+      ]
+   },
+   {
+      "text" : "_\u000b",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "_̈\u000b",
+      "words" : [
+         "_̈"
+      ]
+   },
+   {
+      "text" : "_〱",
+      "words" : [
+         "_〱"
+      ]
+   },
+   {
+      "text" : "_̈〱",
+      "words" : [
+         "_̈〱"
+      ]
+   },
+   {
+      "text" : "_A",
+      "words" : [
+         "_A"
+      ]
+   },
+   {
+      "text" : "_̈A",
+      "words" : [
+         "_̈A"
+      ]
+   },
+   {
+      "text" : "_:",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "_̈:",
+      "words" : [
+         "_̈"
+      ]
+   },
+   {
+      "text" : "_,",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "_̈,",
+      "words" : [
+         "_̈"
+      ]
+   },
+   {
+      "text" : "_'",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "_̈'",
+      "words" : [
+         "_̈"
+      ]
+   },
+   {
+      "text" : "_0",
+      "words" : [
+         "_0"
+      ]
+   },
+   {
+      "text" : "_̈0",
+      "words" : [
+         "_̈0"
+      ]
+   },
+   {
+      "text" : "__",
+      "words" : [
+         "__"
+      ]
+   },
+   {
+      "text" : "_̈_",
+      "words" : [
+         "_̈_"
+      ]
+   },
+   {
+      "text" : "_­",
+      "words" : [
+         "_­"
+      ]
+   },
+   {
+      "text" : "_̈­",
+      "words" : [
+         "_̈­"
+      ]
+   },
+   {
+      "text" : "_̀",
+      "words" : [
+         "_̀"
+      ]
+   },
+   {
+      "text" : "_̈̀",
+      "words" : [
+         "_̈̀"
+      ]
+   },
+   {
+      "text" : "_a⁠",
+      "words" : [
+         "_a⁠"
+      ]
+   },
+   {
+      "text" : "_̈a⁠",
+      "words" : [
+         "_̈a⁠"
+      ]
+   },
+   {
+      "text" : "_a:",
+      "words" : [
+         "_a"
+      ]
+   },
+   {
+      "text" : "_̈a:",
+      "words" : [
+         "_̈a"
+      ]
+   },
+   {
+      "text" : "_a'",
+      "words" : [
+         "_a"
+      ]
+   },
+   {
+      "text" : "_̈a'",
+      "words" : [
+         "_̈a"
+      ]
+   },
+   {
+      "text" : "_a'⁠",
+      "words" : [
+         "_a"
+      ]
+   },
+   {
+      "text" : "_̈a'⁠",
+      "words" : [
+         "_̈a"
+      ]
+   },
+   {
+      "text" : "_a,",
+      "words" : [
+         "_a"
+      ]
+   },
+   {
+      "text" : "_̈a,",
+      "words" : [
+         "_̈a"
+      ]
+   },
+   {
+      "text" : "_1:",
+      "words" : [
+         "_1"
+      ]
+   },
+   {
+      "text" : "_̈1:",
+      "words" : [
+         "_̈1"
+      ]
+   },
+   {
+      "text" : "_1'",
+      "words" : [
+         "_1"
+      ]
+   },
+   {
+      "text" : "_̈1'",
+      "words" : [
+         "_̈1"
+      ]
+   },
+   {
+      "text" : "_1,",
+      "words" : [
+         "_1"
+      ]
+   },
+   {
+      "text" : "_̈1,",
+      "words" : [
+         "_̈1"
+      ]
+   },
+   {
+      "text" : "_1.⁠",
+      "words" : [
+         "_1"
+      ]
+   },
+   {
+      "text" : "_̈1.⁠",
+      "words" : [
+         "_̈1"
+      ]
+   },
+   {
+      "text" : "­\u0001",
+      "words" : []
+   },
+   {
+      "text" : "­̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : "­\r",
+      "words" : []
+   },
+   {
+      "text" : "­̈\r",
+      "words" : []
+   },
+   {
+      "text" : "­\n",
+      "words" : []
+   },
+   {
+      "text" : "­̈\n",
+      "words" : []
+   },
+   {
+      "text" : "­\u000b",
+      "words" : []
+   },
+   {
+      "text" : "­̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : "­〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "­̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "­A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "­̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "­:",
+      "words" : []
+   },
+   {
+      "text" : "­̈:",
+      "words" : []
+   },
+   {
+      "text" : "­,",
+      "words" : []
+   },
+   {
+      "text" : "­̈,",
+      "words" : []
+   },
+   {
+      "text" : "­'",
+      "words" : []
+   },
+   {
+      "text" : "­̈'",
+      "words" : []
+   },
+   {
+      "text" : "­0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "­̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "­_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "­̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "­­",
+      "words" : []
+   },
+   {
+      "text" : "­̈­",
+      "words" : []
+   },
+   {
+      "text" : "­̀",
+      "words" : []
+   },
+   {
+      "text" : "­̈̀",
+      "words" : []
+   },
+   {
+      "text" : "­a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "­̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "­a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "­1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "­̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "­1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "­̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "­1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "­̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "­1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "­̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀\u0001",
+      "words" : []
+   },
+   {
+      "text" : "̀̈\u0001",
+      "words" : []
+   },
+   {
+      "text" : "̀\r",
+      "words" : []
+   },
+   {
+      "text" : "̀̈\r",
+      "words" : []
+   },
+   {
+      "text" : "̀\n",
+      "words" : []
+   },
+   {
+      "text" : "̀̈\n",
+      "words" : []
+   },
+   {
+      "text" : "̀\u000b",
+      "words" : []
+   },
+   {
+      "text" : "̀̈\u000b",
+      "words" : []
+   },
+   {
+      "text" : "̀〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "̀̈〱",
+      "words" : [
+         "〱"
+      ]
+   },
+   {
+      "text" : "̀A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "̀̈A",
+      "words" : [
+         "A"
+      ]
+   },
+   {
+      "text" : "̀:",
+      "words" : []
+   },
+   {
+      "text" : "̀̈:",
+      "words" : []
+   },
+   {
+      "text" : "̀,",
+      "words" : []
+   },
+   {
+      "text" : "̀̈,",
+      "words" : []
+   },
+   {
+      "text" : "̀'",
+      "words" : []
+   },
+   {
+      "text" : "̀̈'",
+      "words" : []
+   },
+   {
+      "text" : "̀0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "̀̈0",
+      "words" : [
+         "0"
+      ]
+   },
+   {
+      "text" : "̀_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "̀̈_",
+      "words" : [
+         "_"
+      ]
+   },
+   {
+      "text" : "̀­",
+      "words" : []
+   },
+   {
+      "text" : "̀̈­",
+      "words" : []
+   },
+   {
+      "text" : "̀̀",
+      "words" : []
+   },
+   {
+      "text" : "̀̈̀",
+      "words" : []
+   },
+   {
+      "text" : "̀a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "̀̈a⁠",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "̀a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀̈a:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀̈a'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀̈a'⁠",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀̈a,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "̀1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀̈1:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀̈1'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀̈1,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "̀̈1.⁠",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "a⁠\u0001",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈\u0001",
+      "words" : [
+         "a⁠̈"
+      ]
+   },
+   {
+      "text" : "a⁠\r",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈\r",
+      "words" : [
+         "a⁠̈"
+      ]
+   },
+   {
+      "text" : "a⁠\n",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈\n",
+      "words" : [
+         "a⁠̈"
+      ]
+   },
+   {
+      "text" : "a⁠\u000b",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈\u000b",
+      "words" : [
+         "a⁠̈"
+      ]
+   },
+   {
+      "text" : "a⁠〱",
+      "words" : [
+         "a⁠",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a⁠̈〱",
+      "words" : [
+         "a⁠̈",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a⁠A",
+      "words" : [
+         "a⁠A"
+      ]
+   },
+   {
+      "text" : "a⁠̈A",
+      "words" : [
+         "a⁠̈A"
+      ]
+   },
+   {
+      "text" : "a⁠:",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈:",
+      "words" : [
+         "a⁠̈"
+      ]
+   },
+   {
+      "text" : "a⁠,",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈,",
+      "words" : [
+         "a⁠̈"
+      ]
+   },
+   {
+      "text" : "a⁠'",
+      "words" : [
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈'",
+      "words" : [
+         "a⁠̈"
+      ]
+   },
+   {
+      "text" : "a⁠0",
+      "words" : [
+         "a⁠0"
+      ]
+   },
+   {
+      "text" : "a⁠̈0",
+      "words" : [
+         "a⁠̈0"
+      ]
+   },
+   {
+      "text" : "a⁠_",
+      "words" : [
+         "a⁠_"
+      ]
+   },
+   {
+      "text" : "a⁠̈_",
+      "words" : [
+         "a⁠̈_"
+      ]
+   },
+   {
+      "text" : "a⁠­",
+      "words" : [
+         "a⁠­"
+      ]
+   },
+   {
+      "text" : "a⁠̈­",
+      "words" : [
+         "a⁠̈­"
+      ]
+   },
+   {
+      "text" : "a⁠̀",
+      "words" : [
+         "a⁠̀"
+      ]
+   },
+   {
+      "text" : "a⁠̈̀",
+      "words" : [
+         "a⁠̈̀"
+      ]
+   },
+   {
+      "text" : "a⁠a⁠",
+      "words" : [
+         "a⁠a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠̈a⁠",
+      "words" : [
+         "a⁠̈a⁠"
+      ]
+   },
+   {
+      "text" : "a⁠a:",
+      "words" : [
+         "a⁠a"
+      ]
+   },
+   {
+      "text" : "a⁠̈a:",
+      "words" : [
+         "a⁠̈a"
+      ]
+   },
+   {
+      "text" : "a⁠a'",
+      "words" : [
+         "a⁠a"
+      ]
+   },
+   {
+      "text" : "a⁠̈a'",
+      "words" : [
+         "a⁠̈a"
+      ]
+   },
+   {
+      "text" : "a⁠a'⁠",
+      "words" : [
+         "a⁠a"
+      ]
+   },
+   {
+      "text" : "a⁠̈a'⁠",
+      "words" : [
+         "a⁠̈a"
+      ]
+   },
+   {
+      "text" : "a⁠a,",
+      "words" : [
+         "a⁠a"
+      ]
+   },
+   {
+      "text" : "a⁠̈a,",
+      "words" : [
+         "a⁠̈a"
+      ]
+   },
+   {
+      "text" : "a⁠1:",
+      "words" : [
+         "a⁠1"
+      ]
+   },
+   {
+      "text" : "a⁠̈1:",
+      "words" : [
+         "a⁠̈1"
+      ]
+   },
+   {
+      "text" : "a⁠1'",
+      "words" : [
+         "a⁠1"
+      ]
+   },
+   {
+      "text" : "a⁠̈1'",
+      "words" : [
+         "a⁠̈1"
+      ]
+   },
+   {
+      "text" : "a⁠1,",
+      "words" : [
+         "a⁠1"
+      ]
+   },
+   {
+      "text" : "a⁠̈1,",
+      "words" : [
+         "a⁠̈1"
+      ]
+   },
+   {
+      "text" : "a⁠1.⁠",
+      "words" : [
+         "a⁠1"
+      ]
+   },
+   {
+      "text" : "a⁠̈1.⁠",
+      "words" : [
+         "a⁠̈1"
+      ]
+   },
+   {
+      "text" : "a:\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a:̈〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a:A",
+      "words" : [
+         "a:A"
+      ]
+   },
+   {
+      "text" : "a:̈A",
+      "words" : [
+         "a:̈A"
+      ]
+   },
+   {
+      "text" : "a::",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a:̈0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a:_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a:̈_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a:­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:̈̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a:a⁠",
+      "words" : [
+         "a:a⁠"
+      ]
+   },
+   {
+      "text" : "a:̈a⁠",
+      "words" : [
+         "a:̈a⁠"
+      ]
+   },
+   {
+      "text" : "a:a:",
+      "words" : [
+         "a:a"
+      ]
+   },
+   {
+      "text" : "a:̈a:",
+      "words" : [
+         "a:̈a"
+      ]
+   },
+   {
+      "text" : "a:a'",
+      "words" : [
+         "a:a"
+      ]
+   },
+   {
+      "text" : "a:̈a'",
+      "words" : [
+         "a:̈a"
+      ]
+   },
+   {
+      "text" : "a:a'⁠",
+      "words" : [
+         "a:a"
+      ]
+   },
+   {
+      "text" : "a:̈a'⁠",
+      "words" : [
+         "a:̈a"
+      ]
+   },
+   {
+      "text" : "a:a,",
+      "words" : [
+         "a:a"
+      ]
+   },
+   {
+      "text" : "a:̈a,",
+      "words" : [
+         "a:̈a"
+      ]
+   },
+   {
+      "text" : "a:1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a:̈1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a:1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a:̈1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a:1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a:̈1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a:1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a:̈1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a'̈〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a'A",
+      "words" : [
+         "a'A"
+      ]
+   },
+   {
+      "text" : "a'̈A",
+      "words" : [
+         "a'̈A"
+      ]
+   },
+   {
+      "text" : "a':",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a',",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a''",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a'̈0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a'_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a'̈_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a'­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'̈̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'a⁠",
+      "words" : [
+         "a'a⁠"
+      ]
+   },
+   {
+      "text" : "a'̈a⁠",
+      "words" : [
+         "a'̈a⁠"
+      ]
+   },
+   {
+      "text" : "a'a:",
+      "words" : [
+         "a'a"
+      ]
+   },
+   {
+      "text" : "a'̈a:",
+      "words" : [
+         "a'̈a"
+      ]
+   },
+   {
+      "text" : "a'a'",
+      "words" : [
+         "a'a"
+      ]
+   },
+   {
+      "text" : "a'̈a'",
+      "words" : [
+         "a'̈a"
+      ]
+   },
+   {
+      "text" : "a'a'⁠",
+      "words" : [
+         "a'a"
+      ]
+   },
+   {
+      "text" : "a'̈a'⁠",
+      "words" : [
+         "a'̈a"
+      ]
+   },
+   {
+      "text" : "a'a,",
+      "words" : [
+         "a'a"
+      ]
+   },
+   {
+      "text" : "a'̈a,",
+      "words" : [
+         "a'̈a"
+      ]
+   },
+   {
+      "text" : "a'1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'̈1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'̈1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'̈1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'̈1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a'⁠̈〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a'⁠A",
+      "words" : [
+         "a'⁠A"
+      ]
+   },
+   {
+      "text" : "a'⁠̈A",
+      "words" : [
+         "a'⁠̈A"
+      ]
+   },
+   {
+      "text" : "a'⁠:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a'⁠̈0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a'⁠_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a'⁠̈_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a'⁠­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a'⁠a⁠",
+      "words" : [
+         "a'⁠a⁠"
+      ]
+   },
+   {
+      "text" : "a'⁠̈a⁠",
+      "words" : [
+         "a'⁠̈a⁠"
+      ]
+   },
+   {
+      "text" : "a'⁠a:",
+      "words" : [
+         "a'⁠a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈a:",
+      "words" : [
+         "a'⁠̈a"
+      ]
+   },
+   {
+      "text" : "a'⁠a'",
+      "words" : [
+         "a'⁠a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈a'",
+      "words" : [
+         "a'⁠̈a"
+      ]
+   },
+   {
+      "text" : "a'⁠a'⁠",
+      "words" : [
+         "a'⁠a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈a'⁠",
+      "words" : [
+         "a'⁠̈a"
+      ]
+   },
+   {
+      "text" : "a'⁠a,",
+      "words" : [
+         "a'⁠a"
+      ]
+   },
+   {
+      "text" : "a'⁠̈a,",
+      "words" : [
+         "a'⁠̈a"
+      ]
+   },
+   {
+      "text" : "a'⁠1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠̈1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠̈1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠̈1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a'⁠̈1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈\u0001",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈\r",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈\n",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈\u000b",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a,̈〱",
+      "words" : [
+         "a",
+         "〱"
+      ]
+   },
+   {
+      "text" : "a,A",
+      "words" : [
+         "a",
+         "A"
+      ]
+   },
+   {
+      "text" : "a,̈A",
+      "words" : [
+         "a",
+         "A"
+      ]
+   },
+   {
+      "text" : "a,:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈:",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈,",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈'",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a,̈0",
+      "words" : [
+         "a",
+         "0"
+      ]
+   },
+   {
+      "text" : "a,_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a,̈_",
+      "words" : [
+         "a",
+         "_"
+      ]
+   },
+   {
+      "text" : "a,­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈­",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈̀",
+      "words" : [
+         "a"
+      ]
+   },
+   {
+      "text" : "a,a⁠",
+      "words" : [
+         "a",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a,̈a⁠",
+      "words" : [
+         "a",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "a,a:",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈a:",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,a'",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈a'",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,a'⁠",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈a'⁠",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,a,",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,̈a,",
+      "words" : [
+         "a",
+         "a"
+      ]
+   },
+   {
+      "text" : "a,1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,̈1:",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,̈1'",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,̈1,",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "a,̈1.⁠",
+      "words" : [
+         "a",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1:̈〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1:A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1:̈A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1::",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:0",
+      "words" : [
+         "1",
+         "0"
+      ]
+   },
+   {
+      "text" : "1:̈0",
+      "words" : [
+         "1",
+         "0"
+      ]
+   },
+   {
+      "text" : "1:_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1:̈_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1:­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1:a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1:̈a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1:a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:̈a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:̈a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:̈a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:̈a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1:1:",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈1:",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:1'",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈1'",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:1,",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈1,",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:1.⁠",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1:̈1.⁠",
+      "words" : [
+         "1",
+         "1"
+      ]
+   },
+   {
+      "text" : "1'\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1'̈〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1'A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1'̈A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1':",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1',",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1''",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'0",
+      "words" : [
+         "1'0"
+      ]
+   },
+   {
+      "text" : "1'̈0",
+      "words" : [
+         "1'̈0"
+      ]
+   },
+   {
+      "text" : "1'_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1'̈_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1'­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'̈̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1'a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1'̈a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1'a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'̈a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'̈a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'̈a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'̈a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1'1:",
+      "words" : [
+         "1'1"
+      ]
+   },
+   {
+      "text" : "1'̈1:",
+      "words" : [
+         "1'̈1"
+      ]
+   },
+   {
+      "text" : "1'1'",
+      "words" : [
+         "1'1"
+      ]
+   },
+   {
+      "text" : "1'̈1'",
+      "words" : [
+         "1'̈1"
+      ]
+   },
+   {
+      "text" : "1'1,",
+      "words" : [
+         "1'1"
+      ]
+   },
+   {
+      "text" : "1'̈1,",
+      "words" : [
+         "1'̈1"
+      ]
+   },
+   {
+      "text" : "1'1.⁠",
+      "words" : [
+         "1'1"
+      ]
+   },
+   {
+      "text" : "1'̈1.⁠",
+      "words" : [
+         "1'̈1"
+      ]
+   },
+   {
+      "text" : "1,\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1,̈〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1,A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1,̈A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1,:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,0",
+      "words" : [
+         "1,0"
+      ]
+   },
+   {
+      "text" : "1,̈0",
+      "words" : [
+         "1,̈0"
+      ]
+   },
+   {
+      "text" : "1,_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1,̈_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1,­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,̈̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1,a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1,̈a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1,a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,̈a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,̈a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,̈a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,̈a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1,1:",
+      "words" : [
+         "1,1"
+      ]
+   },
+   {
+      "text" : "1,̈1:",
+      "words" : [
+         "1,̈1"
+      ]
+   },
+   {
+      "text" : "1,1'",
+      "words" : [
+         "1,1"
+      ]
+   },
+   {
+      "text" : "1,̈1'",
+      "words" : [
+         "1,̈1"
+      ]
+   },
+   {
+      "text" : "1,1,",
+      "words" : [
+         "1,1"
+      ]
+   },
+   {
+      "text" : "1,̈1,",
+      "words" : [
+         "1,̈1"
+      ]
+   },
+   {
+      "text" : "1,1.⁠",
+      "words" : [
+         "1,1"
+      ]
+   },
+   {
+      "text" : "1,̈1.⁠",
+      "words" : [
+         "1,̈1"
+      ]
+   },
+   {
+      "text" : "1.⁠\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈\u0001",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈\r",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈\n",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈\u000b",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1.⁠̈〱",
+      "words" : [
+         "1",
+         "〱"
+      ]
+   },
+   {
+      "text" : "1.⁠A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1.⁠̈A",
+      "words" : [
+         "1",
+         "A"
+      ]
+   },
+   {
+      "text" : "1.⁠:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈:",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈,",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈'",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠0",
+      "words" : [
+         "1.⁠0"
+      ]
+   },
+   {
+      "text" : "1.⁠̈0",
+      "words" : [
+         "1.⁠̈0"
+      ]
+   },
+   {
+      "text" : "1.⁠_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1.⁠̈_",
+      "words" : [
+         "1",
+         "_"
+      ]
+   },
+   {
+      "text" : "1.⁠­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈­",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈̀",
+      "words" : [
+         "1"
+      ]
+   },
+   {
+      "text" : "1.⁠a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1.⁠̈a⁠",
+      "words" : [
+         "1",
+         "a⁠"
+      ]
+   },
+   {
+      "text" : "1.⁠a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠̈a:",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠̈a'",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠̈a'⁠",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠̈a,",
+      "words" : [
+         "1",
+         "a"
+      ]
+   },
+   {
+      "text" : "1.⁠1:",
+      "words" : [
+         "1.⁠1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈1:",
+      "words" : [
+         "1.⁠̈1"
+      ]
+   },
+   {
+      "text" : "1.⁠1'",
+      "words" : [
+         "1.⁠1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈1'",
+      "words" : [
+         "1.⁠̈1"
+      ]
+   },
+   {
+      "text" : "1.⁠1,",
+      "words" : [
+         "1.⁠1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈1,",
+      "words" : [
+         "1.⁠̈1"
+      ]
+   },
+   {
+      "text" : "1.⁠1.⁠",
+      "words" : [
+         "1.⁠1"
+      ]
+   },
+   {
+      "text" : "1.⁠̈1.⁠",
+      "words" : [
+         "1.⁠̈1"
+      ]
+   },
+   {
+      "text" : "can't",
+      "words" : [
+         "can't"
+      ]
+   },
+   {
+      "text" : "can’t",
+      "words" : [
+         "can’t"
+      ]
+   },
+   {
+      "text" : "ab­by",
+      "words" : [
+         "ab­by"
+      ]
+   },
+   {
+      "text" : "a$-34,567.14%b",
+      "words" : [
+         "a",
+         "34,567.14",
+         "b"
+      ]
+   },
+   {
+      "text" : "3a",
+      "words" : [
+         "3a"
+      ]
+   },
+   {
+      "text" : "⁠c⁠a⁠n⁠'⁠t⁠⁠",
+      "words" : [
+         "c⁠a⁠n⁠'⁠t⁠⁠"
+      ]
+   },
+   {
+      "text" : "⁠c⁠a⁠n⁠’⁠t⁠⁠",
+      "words" : [
+         "c⁠a⁠n⁠’⁠t⁠⁠"
+      ]
+   },
+   {
+      "text" : "⁠a⁠b⁠­⁠b⁠y⁠⁠",
+      "words" : [
+         "a⁠b⁠­⁠b⁠y⁠⁠"
+      ]
+   },
+   {
+      "text" : "⁠a⁠$⁠-⁠3⁠4⁠,⁠5⁠6⁠7⁠.⁠1⁠4⁠%⁠b⁠⁠",
+      "words" : [
+         "a⁠",
+         "3⁠4⁠,⁠5⁠6⁠7⁠.⁠1⁠4⁠",
+         "b⁠⁠"
+      ]
+   },
+   {
+      "text" : "⁠3⁠a⁠⁠",
+      "words" : [
+         "3⁠a⁠⁠"
+      ]
+   }
+]

Modified: incubator/lucy/trunk/perl/MANIFEST
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/MANIFEST?rev=1213252&r1=1213251&r2=1213252&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/MANIFEST (original)
+++ incubator/lucy/trunk/perl/MANIFEST Mon Dec 12 14:19:17 2011
@@ -14,6 +14,7 @@ lib/Lucy/Analysis/PolyAnalyzer.pm
 lib/Lucy/Analysis/RegexTokenizer.pm
 lib/Lucy/Analysis/SnowballStemmer.pm
 lib/Lucy/Analysis/SnowballStopFilter.pm
+lib/Lucy/Analysis/StandardTokenizer.pm
 lib/Lucy/Analysis/Token.pm
 lib/Lucy/Docs/Cookbook.pod
 lib/Lucy/Docs/Cookbook/CustomQuery.pod
@@ -249,6 +250,7 @@ t/154-regex_tokenizer.t
 t/155-snowball_stop_filter.t
 t/156-snowball_stemmer.t
 t/157-normalizer.t
+t/158-standard_tokenizer.t
 t/200-doc.t
 t/201-hit_doc.t
 t/204-doc_reader.t
@@ -373,6 +375,7 @@ t/core/154-regex_tokenizer.t
 t/core/155-snowball_stop_filter.t
 t/core/156-snowball_stemmer.t
 t/core/157-normalizer.t
+t/core/158-standard_tokenizer.t
 t/core/206-snapshot.t
 t/core/216-schema.t
 t/core/220-doc_writer.t

Modified: incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm?rev=1213252&r1=1213251&r2=1213252&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm (original)
+++ incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm Mon Dec 12 14:19:17 2011
@@ -122,6 +122,7 @@ my $SNOWSTEM_SRC_DIR
 my $SNOWSTEM_INC_DIR = catdir( $SNOWSTEM_SRC_DIR, 'include' );
 my $SNOWSTOP_SRC_DIR
     = catdir( $base_dir, qw( modules analysis snowstop source ) );
+my $UCD_INC_DIR = catdir( $base_dir, qw( modules unicode ucd ) );
 my $UTF8PROC_SRC_DIR = catdir( $base_dir, qw( modules unicode utf8proc ) );
 my $UTF8PROC_C = catfile( $UTF8PROC_SRC_DIR, 'utf8proc.c' );
 my $CORE_SOURCE_DIR = catdir( $base_dir, 'core' );
@@ -510,7 +511,7 @@ sub ACTION_compile_custom_xs {
     mkpath( $archdir, 0, 0777 ) unless -d $archdir;
     my @include_dirs = (
         getcwd(), $CORE_SOURCE_DIR, $AUTOGEN_DIR, $XS_SOURCE_DIR,
-        $SNOWSTEM_INC_DIR, $UTF8PROC_SRC_DIR
+        $SNOWSTEM_INC_DIR, $UCD_INC_DIR, $UTF8PROC_SRC_DIR
     );
     my @objects;
 

Added: incubator/lucy/trunk/perl/lib/Lucy/Analysis/StandardTokenizer.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Analysis/StandardTokenizer.pm?rev=1213252&view=auto
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Analysis/StandardTokenizer.pm (added)
+++ incubator/lucy/trunk/perl/lib/Lucy/Analysis/StandardTokenizer.pm Mon Dec 12 14:19:17 2011
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+package Lucy::Analysis::StandardTokenizer;
+use Lucy;
+
+1;
+
+__END__
+
+__BINDING__
+
+my $synopsis = <<'END_SYNOPSIS';
+    my $tokenizer = Lucy::Analysis::StandardTokenizer->new;
+
+    # Then... once you have a tokenizer, put it into a PolyAnalyzer:
+    my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
+        analyzers => [ $case_folder, $tokenizer, $stemmer ], );
+END_SYNOPSIS
+
+my $constructor = <<'END_CONSTRUCTOR';
+    my $tokenizer = Lucy::Analysis::StandardTokenizer->new;
+END_CONSTRUCTOR
+
+Clownfish::Binding::Perl::Class->register(
+    parcel            => "Lucy",
+    class_name        => "Lucy::Analysis::StandardTokenizer",
+    bind_constructors => ["new"],
+    make_pod          => {
+        constructor => { sample => $constructor },
+        synopsis    => $synopsis,
+    },
+);
+
+

Modified: incubator/lucy/trunk/perl/lib/Lucy/Test.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy/Test.pm?rev=1213252&r1=1213251&r2=1213252&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy/Test.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy/Test.pm Mon Dec 12 14:19:17 2011
@@ -78,6 +78,9 @@ PPCODE:
     else if (strEQ(package, "TestRegexTokenizer")) {
         lucy_TestRegexTokenizer_run_tests();
     }
+    else if (strEQ(package, "TestStandardTokenizer")) {
+        lucy_TestStandardTokenizer_run_tests();
+    }
     // Lucy::Object
     else if (strEQ(package, "TestObj")) {
         lucy_TestObj_run_tests();