You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/10/29 19:07:44 UTC

[20/32] lucene-solr:jira/solr-12730: Added an arabic snowball stemmer and test dataset

Added an arabic snowball stemmer and test dataset

This change adds an Arabic snowball stemmer based on snowballstem.org
as well as an arabic test dataset in `TestSnowballVocabData.zip`
It also updates the `ant patch-snowball` target to be compatible with
the java classes generated by the last snowball version (tree:
1964ce688cbeca505263c8f77e16ed923296ce7a). The `ant patch-snowball` target
is retro-compatible with the version of snowball stemmers used in
lucene 7.x and ignores already patched classes.

Signed-off-by: Jim Ferenczi <ji...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5c567d4f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5c567d4f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5c567d4f

Branch: refs/heads/jira/solr-12730
Commit: 5c567d4fbcab276a7b6ae4132c3c03af95a75df9
Parents: 6f291d4
Author: Ryadh Dahimene <da...@gmail.com>
Authored: Thu Sep 13 13:26:34 2018 +0100
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Fri Oct 26 10:53:45 2018 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |    4 +
 lucene/analysis/common/build.xml                |  108 +-
 .../tartarus/snowball/ext/ArabicStemmer.java    | 1912 ++++++++++++++++++
 .../analysis/snowball/TestSnowballVocab.java    |    1 +
 .../analysis/snowball/TestSnowballVocabData.zip |  Bin 3128133 -> 3568843 bytes
 5 files changed, 2000 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c567d4f/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 35dc4ec..9b0382b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -226,6 +226,10 @@ New Features
 * LUCENE-8538: Add a Simple WKT Shape Parser for creating Lucene Geometries (Polygon, Line,
   Rectangle) from WKT format. (Nick Knize)
 
+* LUCENE-8462: Adds an Arabic snowball stemmer based on
+  https://github.com/snowballstem/snowball/blob/master/algorithms/arabic.sbl 
+  (Ryadh Dahimene via Jim Ferenczi)
+
 Improvements:
 
 * LUCENE-8521: Change LatLonShape encoding to 7 dimensions instead of 6; where the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c567d4f/lucene/analysis/common/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/build.xml b/lucene/analysis/common/build.xml
index 6e4f118..2064e19 100644
--- a/lucene/analysis/common/build.xml
+++ b/lucene/analysis/common/build.xml
@@ -7,9 +7,9 @@
     The ASF licenses this file to You under the Apache License, Version 2.0
     the "License"); you may not use this file except in compliance with
     the License.  You may obtain a copy of the License at
- 
+
         http://www.apache.org/licenses/LICENSE-2.0
- 
+
     Unless required by applicable law or agreed to in writing, software
     distributed under the License is distributed on an "AS IS" BASIS,
     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,10 +17,10 @@
     limitations under the License.
  -->
 
-<project name="analyzers-common" default="default">
+<project name="analyzers-common" default="default" xmlns:rsel="antlib:org.apache.tools.ant.types.resources.selectors">
 
   <description>
-   Analyzers for indexing content in different languages and domains.
+    Analyzers for indexing content in different languages and domains.
   </description>
 
   <!-- some files for testing that do not have license headers -->
@@ -28,9 +28,9 @@
   <property name="rat.additional-includes" value="src/tools/**"/>
 
   <import file="../analysis-module-build.xml"/>
-  
-  <property name="snowball.programs.dir" location="src/java/org/tartarus/snowball/ext"/>  
-  
+
+  <property name="snowball.programs.dir" location="src/java/org/tartarus/snowball/ext"/>
+
   <property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
 
   <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-ClassicAnalyzer,-jflex-UAX29URLEmailTokenizer,
@@ -66,7 +66,7 @@
     <run-jflex-and-disable-buffer-expansion
         dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
   </target>
-  
+
   <target name="-jflex-ClassicAnalyzer" depends="init,-install-jflex">
     <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
   </target>
@@ -84,28 +84,28 @@
       </fileset>
     </delete>
   </target>
-  
+
   <target xmlns:ivy="antlib:org.apache.ivy.ant" name="-resolve-icu4j" unless="icu4j.resolved" depends="ivy-availability-check,ivy-configure">
     <loadproperties prefix="ivyversions" srcFile="${common.dir}/ivy-versions.properties"/>
     <ivy:cachepath organisation="com.ibm.icu" module="icu4j" revision="${ivyversions./com.ibm.icu/icu4j}"
-      inline="true" conf="default" transitive="true" pathid="icu4j.classpath"/>
+                   inline="true" conf="default" transitive="true" pathid="icu4j.classpath"/>
     <property name="icu4j.resolved" value="true"/>
   </target>
-  
+
   <target name="unicode-data" depends="-resolve-icu4j,resolve-groovy">
     <groovy classpathref="icu4j.classpath" src="src/tools/groovy/generate-unicode-data.groovy"/>
     <fixcrlf file="${unicode-props-file}" encoding="UTF-8"/>
   </target>
-  
+
   <property name="tld.zones" value="http://www.internic.net/zones/root.zone"/>
   <property name="tld.output" location="src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro"/>
 
   <target name="gen-tlds" depends="compile-tools">
     <java
-      classname="org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
-      dir="."
-      fork="true"
-      failonerror="true">
+        classname="org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
+        dir="."
+        fork="true"
+        failonerror="true">
       <classpath>
         <pathelement location="${build.dir}/classes/tools"/>
       </classpath>
@@ -117,8 +117,8 @@
 
   <target name="compile-tools" depends="common.compile-tools">
     <compile
-      srcdir="src/tools/java"
-      destdir="${build.dir}/classes/tools">
+        srcdir="src/tools/java"
+        destdir="${build.dir}/classes/tools">
       <classpath refid="classpath"/>
     </compile>
   </target>
@@ -126,15 +126,73 @@
   <target name="javadocs" depends="module-build.javadocs"/>
 
   <target name="regenerate" depends="jflex,unicode-data"/>
-  
+
   <target name="patch-snowball" description="Patches all snowball programs in '${snowball.programs.dir}' to make them work with MethodHandles">
-      <fileset id="snowball.programs" dir="${snowball.programs.dir}" includes="*Stemmer.java"/>
-      <replaceregexp match="^public class \w+Stemmer\b" replace="@SuppressWarnings(&quot;unused&quot;) \0" flags="m" encoding="UTF-8">
+    <fileset id="snowball.programs" dir="${snowball.programs.dir}" includes="*Stemmer.java"/>
+
+    <replaceregexp match="^public class \w+Stemmer\b" replace="@SuppressWarnings(&quot;unused&quot;) \0" flags="m" encoding="UTF-8">
+      <restrict>
+        <fileset refid="snowball.programs"/>
+        <rsel:not>
+          <rsel:contains text="patched"/>
+        </rsel:not>
+      </restrict>
+    </replaceregexp>
+
+    <replaceregexp match="new Among\(([^,]*,[^,]*,[^,]*?)(?=\))" replace="\0, &quot;&quot;, methodObject" flags="g" encoding="UTF-8">
+      <restrict>
+        <fileset refid="snowball.programs"/>
+        <rsel:not>
+          <rsel:contains text="patched"/>
+        </rsel:not>
+      </restrict>
+    </replaceregexp>
+
+    <replaceregexp match="(new Among\([^,]*,[^,]*,[^,]*,[^,]*,)[^,]*?(?=\))" replace="\1 methodObject" flags="g" encoding="UTF-8">
+      <restrict>
         <fileset refid="snowball.programs"/>
-      </replaceregexp>
-      <replaceregexp match="private final static \w+Stemmer methodObject\b.*$" replace="/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();" flags="m" encoding="UTF-8">
+        <rsel:not>
+          <rsel:contains text="patched"/>
+        </rsel:not>
+      </restrict>
+    </replaceregexp>
+
+    <replaceregexp match="(?:find_among(?:|_b)\()(.*?)(?=\))" replace="\0, \1.length" flags="g" encoding="UTF-8">
+      <restrict>
+        <fileset refid="snowball.programs"/>
+        <rsel:not>
+          <rsel:contains text="patched"/>
+        </rsel:not>
+      </restrict>
+    </replaceregexp>
+
+    <replaceregexp match="current" replace="getCurrent()" flags="g" encoding="UTF-8">
+      <restrict>
         <fileset refid="snowball.programs"/>
-      </replaceregexp>
-      <fixcrlf srcdir="${snowball.programs.dir}" includes="*Stemmer.java" tab="remove" tablength="2" encoding="UTF-8" javafiles="yes" fixlast="yes"/>
+        <rsel:not>
+          <rsel:contains text="patched"/>
+        </rsel:not>
+      </restrict>
+    </replaceregexp>
+
+    <replaceregexp match="(?:eq_s(?:|_b)\()(.*?)(?=\))" replace="\0.length(),\1" flags="g" encoding="UTF-8">
+      <restrict>
+        <fileset refid="snowball.programs"/>
+        <rsel:not>
+          <rsel:contains text="patched"/>
+        </rsel:not>
+      </restrict>
+    </replaceregexp>
+
+    <replaceregexp match="private static final long serialVersionUID(.*)" replace="private static final long serialVersionUID = 1L; ${line.separator}${line.separator} /* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();" flags="m" encoding="UTF-8">
+      <restrict>
+        <fileset refid="snowball.programs"/>
+        <rsel:not>
+          <rsel:contains text="patched"/>
+        </rsel:not>
+      </restrict>
+    </replaceregexp>
+
+    <fixcrlf srcdir="${snowball.programs.dir}" includes="*Stemmer.java" tab="remove" tablength="2" encoding="UTF-8" javafiles="yes" fixlast="yes"/>
   </target>
 </project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c567d4f/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArabicStemmer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArabicStemmer.java b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArabicStemmer.java
new file mode 100644
index 0000000..2b907b2
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArabicStemmer.java
@@ -0,0 +1,1912 @@
+// This file was generated automatically by the Snowball to Java compiler
+// http://snowballstem.org/
+
+package org.tartarus.snowball.ext;
+
+import org.tartarus.snowball.Among;
+
+/**
+ * This class was automatically generated by a Snowball to Java compiler
+ * It implements the stemming algorithm defined by a snowball script.
+ */
+
+@SuppressWarnings("unused") public class ArabicStemmer extends org.tartarus.snowball.SnowballProgram {
+
+    private static final long serialVersionUID = 1L; 
+
+ /* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();
+
+    private final static Among a_0[] = {
+        new Among("\u0640", -1, 2, "", methodObject),
+        new Among("\u064B", -1, 1, "", methodObject),
+        new Among("\u064C", -1, 1, "", methodObject),
+        new Among("\u064D", -1, 1, "", methodObject),
+        new Among("\u064E", -1, 1, "", methodObject),
+        new Among("\u064F", -1, 1, "", methodObject),
+        new Among("\u0650", -1, 1, "", methodObject),
+        new Among("\u0651", -1, 1, "", methodObject),
+        new Among("\u0652", -1, 1, "", methodObject),
+        new Among("\u0660", -1, 3, "", methodObject),
+        new Among("\u0661", -1, 4, "", methodObject),
+        new Among("\u0662", -1, 5, "", methodObject),
+        new Among("\u0663", -1, 6, "", methodObject),
+        new Among("\u0664", -1, 7, "", methodObject),
+        new Among("\u0665", -1, 8, "", methodObject),
+        new Among("\u0666", -1, 9, "", methodObject),
+        new Among("\u0667", -1, 10, "", methodObject),
+        new Among("\u0668", -1, 11, "", methodObject),
+        new Among("\u0669", -1, 12, "", methodObject),
+        new Among("\uFE80", -1, 13, "", methodObject),
+        new Among("\uFE81", -1, 17, "", methodObject),
+        new Among("\uFE82", -1, 17, "", methodObject),
+        new Among("\uFE83", -1, 14, "", methodObject),
+        new Among("\uFE84", -1, 14, "", methodObject),
+        new Among("\uFE85", -1, 18, "", methodObject),
+        new Among("\uFE86", -1, 18, "", methodObject),
+        new Among("\uFE87", -1, 15, "", methodObject),
+        new Among("\uFE88", -1, 15, "", methodObject),
+        new Among("\uFE89", -1, 16, "", methodObject),
+        new Among("\uFE8A", -1, 16, "", methodObject),
+        new Among("\uFE8B", -1, 16, "", methodObject),
+        new Among("\uFE8C", -1, 16, "", methodObject),
+        new Among("\uFE8D", -1, 19, "", methodObject),
+        new Among("\uFE8E", -1, 19, "", methodObject),
+        new Among("\uFE8F", -1, 20, "", methodObject),
+        new Among("\uFE90", -1, 20, "", methodObject),
+        new Among("\uFE91", -1, 20, "", methodObject),
+        new Among("\uFE92", -1, 20, "", methodObject),
+        new Among("\uFE93", -1, 21, "", methodObject),
+        new Among("\uFE94", -1, 21, "", methodObject),
+        new Among("\uFE95", -1, 22, "", methodObject),
+        new Among("\uFE96", -1, 22, "", methodObject),
+        new Among("\uFE97", -1, 22, "", methodObject),
+        new Among("\uFE98", -1, 22, "", methodObject),
+        new Among("\uFE99", -1, 23, "", methodObject),
+        new Among("\uFE9A", -1, 23, "", methodObject),
+        new Among("\uFE9B", -1, 23, "", methodObject),
+        new Among("\uFE9C", -1, 23, "", methodObject),
+        new Among("\uFE9D", -1, 24, "", methodObject),
+        new Among("\uFE9E", -1, 24, "", methodObject),
+        new Among("\uFE9F", -1, 24, "", methodObject),
+        new Among("\uFEA0", -1, 24, "", methodObject),
+        new Among("\uFEA1", -1, 25, "", methodObject),
+        new Among("\uFEA2", -1, 25, "", methodObject),
+        new Among("\uFEA3", -1, 25, "", methodObject),
+        new Among("\uFEA4", -1, 25, "", methodObject),
+        new Among("\uFEA5", -1, 26, "", methodObject),
+        new Among("\uFEA6", -1, 26, "", methodObject),
+        new Among("\uFEA7", -1, 26, "", methodObject),
+        new Among("\uFEA8", -1, 26, "", methodObject),
+        new Among("\uFEA9", -1, 27, "", methodObject),
+        new Among("\uFEAA", -1, 27, "", methodObject),
+        new Among("\uFEAB", -1, 28, "", methodObject),
+        new Among("\uFEAC", -1, 28, "", methodObject),
+        new Among("\uFEAD", -1, 29, "", methodObject),
+        new Among("\uFEAE", -1, 29, "", methodObject),
+        new Among("\uFEAF", -1, 30, "", methodObject),
+        new Among("\uFEB0", -1, 30, "", methodObject),
+        new Among("\uFEB1", -1, 31, "", methodObject),
+        new Among("\uFEB2", -1, 31, "", methodObject),
+        new Among("\uFEB3", -1, 31, "", methodObject),
+        new Among("\uFEB4", -1, 31, "", methodObject),
+        new Among("\uFEB5", -1, 32, "", methodObject),
+        new Among("\uFEB6", -1, 32, "", methodObject),
+        new Among("\uFEB7", -1, 32, "", methodObject),
+        new Among("\uFEB8", -1, 32, "", methodObject),
+        new Among("\uFEB9", -1, 33, "", methodObject),
+        new Among("\uFEBA", -1, 33, "", methodObject),
+        new Among("\uFEBB", -1, 33, "", methodObject),
+        new Among("\uFEBC", -1, 33, "", methodObject),
+        new Among("\uFEBD", -1, 34, "", methodObject),
+        new Among("\uFEBE", -1, 34, "", methodObject),
+        new Among("\uFEBF", -1, 34, "", methodObject),
+        new Among("\uFEC0", -1, 34, "", methodObject),
+        new Among("\uFEC1", -1, 35, "", methodObject),
+        new Among("\uFEC2", -1, 35, "", methodObject),
+        new Among("\uFEC3", -1, 35, "", methodObject),
+        new Among("\uFEC4", -1, 35, "", methodObject),
+        new Among("\uFEC5", -1, 36, "", methodObject),
+        new Among("\uFEC6", -1, 36, "", methodObject),
+        new Among("\uFEC7", -1, 36, "", methodObject),
+        new Among("\uFEC8", -1, 36, "", methodObject),
+        new Among("\uFEC9", -1, 37, "", methodObject),
+        new Among("\uFECA", -1, 37, "", methodObject),
+        new Among("\uFECB", -1, 37, "", methodObject),
+        new Among("\uFECC", -1, 37, "", methodObject),
+        new Among("\uFECD", -1, 38, "", methodObject),
+        new Among("\uFECE", -1, 38, "", methodObject),
+        new Among("\uFECF", -1, 38, "", methodObject),
+        new Among("\uFED0", -1, 38, "", methodObject),
+        new Among("\uFED1", -1, 39, "", methodObject),
+        new Among("\uFED2", -1, 39, "", methodObject),
+        new Among("\uFED3", -1, 39, "", methodObject),
+        new Among("\uFED4", -1, 39, "", methodObject),
+        new Among("\uFED5", -1, 40, "", methodObject),
+        new Among("\uFED6", -1, 40, "", methodObject),
+        new Among("\uFED7", -1, 40, "", methodObject),
+        new Among("\uFED8", -1, 40, "", methodObject),
+        new Among("\uFED9", -1, 41, "", methodObject),
+        new Among("\uFEDA", -1, 41, "", methodObject),
+        new Among("\uFEDB", -1, 41, "", methodObject),
+        new Among("\uFEDC", -1, 41, "", methodObject),
+        new Among("\uFEDD", -1, 42, "", methodObject),
+        new Among("\uFEDE", -1, 42, "", methodObject),
+        new Among("\uFEDF", -1, 42, "", methodObject),
+        new Among("\uFEE0", -1, 42, "", methodObject),
+        new Among("\uFEE1", -1, 43, "", methodObject),
+        new Among("\uFEE2", -1, 43, "", methodObject),
+        new Among("\uFEE3", -1, 43, "", methodObject),
+        new Among("\uFEE4", -1, 43, "", methodObject),
+        new Among("\uFEE5", -1, 44, "", methodObject),
+        new Among("\uFEE6", -1, 44, "", methodObject),
+        new Among("\uFEE7", -1, 44, "", methodObject),
+        new Among("\uFEE8", -1, 44, "", methodObject),
+        new Among("\uFEE9", -1, 45, "", methodObject),
+        new Among("\uFEEA", -1, 45, "", methodObject),
+        new Among("\uFEEB", -1, 45, "", methodObject),
+        new Among("\uFEEC", -1, 45, "", methodObject),
+        new Among("\uFEED", -1, 46, "", methodObject),
+        new Among("\uFEEE", -1, 46, "", methodObject),
+        new Among("\uFEEF", -1, 47, "", methodObject),
+        new Among("\uFEF0", -1, 47, "", methodObject),
+        new Among("\uFEF1", -1, 48, "", methodObject),
+        new Among("\uFEF2", -1, 48, "", methodObject),
+        new Among("\uFEF3", -1, 48, "", methodObject),
+        new Among("\uFEF4", -1, 48, "", methodObject),
+        new Among("\uFEF5", -1, 52, "", methodObject),
+        new Among("\uFEF6", -1, 52, "", methodObject),
+        new Among("\uFEF7", -1, 50, "", methodObject),
+        new Among("\uFEF8", -1, 50, "", methodObject),
+        new Among("\uFEF9", -1, 51, "", methodObject),
+        new Among("\uFEFA", -1, 51, "", methodObject),
+        new Among("\uFEFB", -1, 49, "", methodObject),
+        new Among("\uFEFC", -1, 49, "", methodObject)
+    };
+
+    private final static Among a_1[] = {
+        new Among("\u0622", -1, 1, "", methodObject),
+        new Among("\u0623", -1, 1, "", methodObject),
+        new Among("\u0624", -1, 2, "", methodObject),
+        new Among("\u0625", -1, 1, "", methodObject),
+        new Among("\u0626", -1, 3, "", methodObject)
+    };
+
+    private final static Among a_2[] = {
+        new Among("\u0622", -1, 1, "", methodObject),
+        new Among("\u0623", -1, 1, "", methodObject),
+        new Among("\u0624", -1, 2, "", methodObject),
+        new Among("\u0625", -1, 1, "", methodObject),
+        new Among("\u0626", -1, 3, "", methodObject)
+    };
+
+    private final static Among a_3[] = {
+        new Among("\u0627\u0644", -1, 2, "", methodObject),
+        new Among("\u0628\u0627\u0644", -1, 1, "", methodObject),
+        new Among("\u0643\u0627\u0644", -1, 1, "", methodObject),
+        new Among("\u0644\u0644", -1, 2, "", methodObject)
+    };
+
+    private final static Among a_4[] = {
+        new Among("\u0623\u0622", -1, 2, "", methodObject),
+        new Among("\u0623\u0623", -1, 1, "", methodObject),
+        new Among("\u0623\u0624", -1, 3, "", methodObject),
+        new Among("\u0623\u0625", -1, 5, "", methodObject),
+        new Among("\u0623\u0627", -1, 4, "", methodObject)
+    };
+
+    private final static Among a_5[] = {
+        new Among("\u0641", -1, 1, "", methodObject),
+        new Among("\u0648", -1, 2, "", methodObject)
+    };
+
+    private final static Among a_6[] = {
+        new Among("\u0627\u0644", -1, 2, "", methodObject),
+        new Among("\u0628\u0627\u0644", -1, 1, "", methodObject),
+        new Among("\u0643\u0627\u0644", -1, 1, "", methodObject),
+        new Among("\u0644\u0644", -1, 2, "", methodObject)
+    };
+
+    private final static Among a_7[] = {
+        new Among("\u0628", -1, 1, "", methodObject),
+        new Among("\u0628\u0628", 0, 2, "", methodObject),
+        new Among("\u0643\u0643", -1, 3, "", methodObject)
+    };
+
+    private final static Among a_8[] = {
+        new Among("\u0633\u0623", -1, 4, "", methodObject),
+        new Among("\u0633\u062A", -1, 2, "", methodObject),
+        new Among("\u0633\u0646", -1, 3, "", methodObject),
+        new Among("\u0633\u064A", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_9[] = {
+        new Among("\u062A\u0633\u062A", -1, 1, "", methodObject),
+        new Among("\u0646\u0633\u062A", -1, 1, "", methodObject),
+        new Among("\u064A\u0633\u062A", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_10[] = {
+        new Among("\u0643\u0645\u0627", -1, 3, "", methodObject),
+        new Among("\u0647\u0645\u0627", -1, 3, "", methodObject),
+        new Among("\u0646\u0627", -1, 2, "", methodObject),
+        new Among("\u0647\u0627", -1, 2, "", methodObject),
+        new Among("\u0643", -1, 1, "", methodObject),
+        new Among("\u0643\u0645", -1, 2, "", methodObject),
+        new Among("\u0647\u0645", -1, 2, "", methodObject),
+        new Among("\u0647\u0646", -1, 2, "", methodObject),
+        new Among("\u0647", -1, 1, "", methodObject),
+        new Among("\u064A", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_11[] = {
+        new Among("\u0646", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_12[] = {
+        new Among("\u0627", -1, 1, "", methodObject),
+        new Among("\u0648", -1, 1, "", methodObject),
+        new Among("\u064A", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_13[] = {
+        new Among("\u0627\u062A", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_14[] = {
+        new Among("\u062A", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_15[] = {
+        new Among("\u0629", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_16[] = {
+        new Among("\u064A", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_17[] = {
+        new Among("\u0643\u0645\u0627", -1, 3, "", methodObject),
+        new Among("\u0647\u0645\u0627", -1, 3, "", methodObject),
+        new Among("\u0646\u0627", -1, 2, "", methodObject),
+        new Among("\u0647\u0627", -1, 2, "", methodObject),
+        new Among("\u0643", -1, 1, "", methodObject),
+        new Among("\u0643\u0645", -1, 2, "", methodObject),
+        new Among("\u0647\u0645", -1, 2, "", methodObject),
+        new Among("\u0643\u0646", -1, 2, "", methodObject),
+        new Among("\u0647\u0646", -1, 2, "", methodObject),
+        new Among("\u0647", -1, 1, "", methodObject),
+        new Among("\u0643\u0645\u0648", -1, 3, "", methodObject),
+        new Among("\u0646\u064A", -1, 2, "", methodObject)
+    };
+
+    private final static Among a_18[] = {
+        new Among("\u0627", -1, 2, "", methodObject),
+        new Among("\u062A\u0627", 0, 3, "", methodObject),
+        new Among("\u062A\u0645\u0627", 0, 5, "", methodObject),
+        new Among("\u0646\u0627", 0, 3, "", methodObject),
+        new Among("\u062A", -1, 1, "", methodObject),
+        new Among("\u0646", -1, 2, "", methodObject),
+        new Among("\u0627\u0646", 5, 4, "", methodObject),
+        new Among("\u062A\u0646", 5, 3, "", methodObject),
+        new Among("\u0648\u0646", 5, 4, "", methodObject),
+        new Among("\u064A\u0646", 5, 4, "", methodObject),
+        new Among("\u064A", -1, 2, "", methodObject)
+    };
+
+    private final static Among a_19[] = {
+        new Among("\u0648\u0627", -1, 1, "", methodObject),
+        new Among("\u062A\u0645", -1, 1, "", methodObject)
+    };
+
+    private final static Among a_20[] = {
+        new Among("\u0648", -1, 1, "", methodObject),
+        new Among("\u062A\u0645\u0648", 0, 2, "", methodObject)
+    };
+
+    private final static Among a_21[] = {
+        new Among("\u0649", -1, 1, "", methodObject)
+    };
+
+    private boolean B_is_defined;
+    private boolean B_is_verb;
+    private boolean B_is_noun;
+    private int I_word_len;
+
+
+    private boolean r_Normalize_pre() {
+        int among_var;
+        // (, line 251
+        // loop, line 252
+        for (int v_1 = getCurrent().length(); v_1 > 0; v_1--)
+        {
+            // (, line 252
+            // or, line 316
+            lab0: do {
+                int v_2 = cursor;
+                lab1: do {
+                    // (, line 253
+                    // [, line 254
+                    bra = cursor;
+                    // substring, line 254
+                    among_var = find_among(a_0, a_0.length);
+                    if (among_var == 0)
+                    {
+                        break lab1;
+                    }
+                    // ], line 254
+                    ket = cursor;
+                    switch (among_var) {
+                        case 0:
+                            break lab1;
+                        case 1:
+                            // (, line 255
+                            // delete, line 255
+                            slice_del();
+                            break;
+                        case 2:
+                            // (, line 256
+                            // delete, line 256
+                            slice_del();
+                            break;
+                        case 3:
+                            // (, line 259
+                            // <-, line 259
+                            slice_from("0");
+                            break;
+                        case 4:
+                            // (, line 260
+                            // <-, line 260
+                            slice_from("1");
+                            break;
+                        case 5:
+                            // (, line 261
+                            // <-, line 261
+                            slice_from("2");
+                            break;
+                        case 6:
+                            // (, line 262
+                            // <-, line 262
+                            slice_from("3");
+                            break;
+                        case 7:
+                            // (, line 263
+                            // <-, line 263
+                            slice_from("4");
+                            break;
+                        case 8:
+                            // (, line 264
+                            // <-, line 264
+                            slice_from("5");
+                            break;
+                        case 9:
+                            // (, line 265
+                            // <-, line 265
+                            slice_from("6");
+                            break;
+                        case 10:
+                            // (, line 266
+                            // <-, line 266
+                            slice_from("7");
+                            break;
+                        case 11:
+                            // (, line 267
+                            // <-, line 267
+                            slice_from("8");
+                            break;
+                        case 12:
+                            // (, line 268
+                            // <-, line 268
+                            slice_from("9");
+                            break;
+                        case 13:
+                            // (, line 271
+                            // <-, line 271
+                            slice_from("\u0621");
+                            break;
+                        case 14:
+                            // (, line 272
+                            // <-, line 272
+                            slice_from("\u0623");
+                            break;
+                        case 15:
+                            // (, line 273
+                            // <-, line 273
+                            slice_from("\u0625");
+                            break;
+                        case 16:
+                            // (, line 274
+                            // <-, line 274
+                            slice_from("\u0626");
+                            break;
+                        case 17:
+                            // (, line 275
+                            // <-, line 275
+                            slice_from("\u0622");
+                            break;
+                        case 18:
+                            // (, line 276
+                            // <-, line 276
+                            slice_from("\u0624");
+                            break;
+                        case 19:
+                            // (, line 277
+                            // <-, line 277
+                            slice_from("\u0627");
+                            break;
+                        case 20:
+                            // (, line 278
+                            // <-, line 278
+                            slice_from("\u0628");
+                            break;
+                        case 21:
+                            // (, line 279
+                            // <-, line 279
+                            slice_from("\u0629");
+                            break;
+                        case 22:
+                            // (, line 280
+                            // <-, line 280
+                            slice_from("\u062A");
+                            break;
+                        case 23:
+                            // (, line 281
+                            // <-, line 281
+                            slice_from("\u062B");
+                            break;
+                        case 24:
+                            // (, line 282
+                            // <-, line 282
+                            slice_from("\u062C");
+                            break;
+                        case 25:
+                            // (, line 283
+                            // <-, line 283
+                            slice_from("\u062D");
+                            break;
+                        case 26:
+                            // (, line 284
+                            // <-, line 284
+                            slice_from("\u062E");
+                            break;
+                        case 27:
+                            // (, line 285
+                            // <-, line 285
+                            slice_from("\u062F");
+                            break;
+                        case 28:
+                            // (, line 286
+                            // <-, line 286
+                            slice_from("\u0630");
+                            break;
+                        case 29:
+                            // (, line 287
+                            // <-, line 287
+                            slice_from("\u0631");
+                            break;
+                        case 30:
+                            // (, line 288
+                            // <-, line 288
+                            slice_from("\u0632");
+                            break;
+                        case 31:
+                            // (, line 289
+                            // <-, line 289
+                            slice_from("\u0633");
+                            break;
+                        case 32:
+                            // (, line 290
+                            // <-, line 290
+                            slice_from("\u0634");
+                            break;
+                        case 33:
+                            // (, line 291
+                            // <-, line 291
+                            slice_from("\u0635");
+                            break;
+                        case 34:
+                            // (, line 292
+                            // <-, line 292
+                            slice_from("\u0636");
+                            break;
+                        case 35:
+                            // (, line 293
+                            // <-, line 293
+                            slice_from("\u0637");
+                            break;
+                        case 36:
+                            // (, line 294
+                            // <-, line 294
+                            slice_from("\u0638");
+                            break;
+                        case 37:
+                            // (, line 295
+                            // <-, line 295
+                            slice_from("\u0639");
+                            break;
+                        case 38:
+                            // (, line 296
+                            // <-, line 296
+                            slice_from("\u063A");
+                            break;
+                        case 39:
+                            // (, line 297
+                            // <-, line 297
+                            slice_from("\u0641");
+                            break;
+                        case 40:
+                            // (, line 298
+                            // <-, line 298
+                            slice_from("\u0642");
+                            break;
+                        case 41:
+                            // (, line 299
+                            // <-, line 299
+                            slice_from("\u0643");
+                            break;
+                        case 42:
+                            // (, line 300
+                            // <-, line 300
+                            slice_from("\u0644");
+                            break;
+                        case 43:
+                            // (, line 301
+                            // <-, line 301
+                            slice_from("\u0645");
+                            break;
+                        case 44:
+                            // (, line 302
+                            // <-, line 302
+                            slice_from("\u0646");
+                            break;
+                        case 45:
+                            // (, line 303
+                            // <-, line 303
+                            slice_from("\u0647");
+                            break;
+                        case 46:
+                            // (, line 304
+                            // <-, line 304
+                            slice_from("\u0648");
+                            break;
+                        case 47:
+                            // (, line 305
+                            // <-, line 305
+                            slice_from("\u0649");
+                            break;
+                        case 48:
+                            // (, line 306
+                            // <-, line 306
+                            slice_from("\u064A");
+                            break;
+                        case 49:
+                            // (, line 309
+                            // <-, line 309
+                            slice_from("\u0644\u0627");
+                            break;
+                        case 50:
+                            // (, line 310
+                            // <-, line 310
+                            slice_from("\u0644\u0623");
+                            break;
+                        case 51:
+                            // (, line 311
+                            // <-, line 311
+                            slice_from("\u0644\u0625");
+                            break;
+                        case 52:
+                            // (, line 312
+                            // <-, line 312
+                            slice_from("\u0644\u0622");
+                            break;
+                    }
+                    break lab0;
+                } while (false);
+                cursor = v_2;
+                // next, line 317
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            } while (false);
+        }
+        return true;
+    }
+
+    private boolean r_Normalize_post() {
+        int among_var;
+        // (, line 321
+        // do, line 323
+        int v_1 = cursor;
+        lab0: do {
+            // (, line 323
+            // backwards, line 325
+            limit_backward = cursor;
+            cursor = limit;
+            // (, line 325
+            // [, line 326
+            ket = cursor;
+            // substring, line 326
+            among_var = find_among_b(a_1, a_1.length);
+            if (among_var == 0)
+            {
+                break lab0;
+            }
+            // ], line 326
+            bra = cursor;
+            switch (among_var) {
+                case 0:
+                    break lab0;
+                case 1:
+                    // (, line 327
+                    // <-, line 327
+                    slice_from("\u0621");
+                    break;
+                case 2:
+                    // (, line 328
+                    // <-, line 328
+                    slice_from("\u0621");
+                    break;
+                case 3:
+                    // (, line 329
+                    // <-, line 329
+                    slice_from("\u0621");
+                    break;
+            }
+            cursor = limit_backward;
+        } while (false);
+        cursor = v_1;
+        // do, line 334
+        int v_2 = cursor;
+        lab1: do {
+            // loop, line 334
+            for (int v_3 = I_word_len; v_3 > 0; v_3--)
+            {
+                // (, line 334
+                // or, line 343
+                lab2: do {
+                    int v_4 = cursor;
+                    lab3: do {
+                        // (, line 335
+                        // [, line 337
+                        bra = cursor;
+                        // substring, line 337
+                        among_var = find_among(a_2, a_2.length);
+                        if (among_var == 0)
+                        {
+                            break lab3;
+                        }
+                        // ], line 337
+                        ket = cursor;
+                        switch (among_var) {
+                            case 0:
+                                break lab3;
+                            case 1:
+                                // (, line 338
+                                // <-, line 338
+                                slice_from("\u0627");
+                                break;
+                            case 2:
+                                // (, line 339
+                                // <-, line 339
+                                slice_from("\u0648");
+                                break;
+                            case 3:
+                                // (, line 340
+                                // <-, line 340
+                                slice_from("\u064A");
+                                break;
+                        }
+                        break lab2;
+                    } while (false);
+                    cursor = v_4;
+                    // next, line 344
+                    if (cursor >= limit)
+                    {
+                        break lab1;
+                    }
+                    cursor++;
+                } while (false);
+            }
+        } while (false);
+        cursor = v_2;
+        return true;
+    }
+
+    private boolean r_Checks1() {
+        int among_var;
+        // (, line 349
+        I_word_len = getCurrent().length();
+        // [, line 351
+        bra = cursor;
+        // substring, line 351
+        among_var = find_among(a_3, a_3.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 351
+        ket = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 352
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // set is_noun, line 352
+                B_is_noun = true;
+                // unset is_verb, line 352
+                B_is_verb = false;
+                // set is_defined, line 352
+                B_is_defined = true;
+                break;
+            case 2:
+                // (, line 353
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // set is_noun, line 353
+                B_is_noun = true;
+                // unset is_verb, line 353
+                B_is_verb = false;
+                // set is_defined, line 353
+                B_is_defined = true;
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Prefix_Step1() {
+        int among_var;
+        // (, line 359
+        I_word_len = getCurrent().length();
+        // [, line 361
+        bra = cursor;
+        // substring, line 361
+        among_var = find_among(a_4, a_4.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 361
+        ket = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 362
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // <-, line 362
+                slice_from("\u0623");
+                break;
+            case 2:
+                // (, line 363
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // <-, line 363
+                slice_from("\u0622");
+                break;
+            case 3:
+                // (, line 364
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // <-, line 364
+                slice_from("\u0623");
+                break;
+            case 4:
+                // (, line 365
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // <-, line 365
+                slice_from("\u0627");
+                break;
+            case 5:
+                // (, line 366
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // <-, line 366
+                slice_from("\u0625");
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Prefix_Step2() {
+        int among_var;
+        // (, line 371
+        I_word_len = getCurrent().length();
+        // not, line 373
+        {
+            int v_1 = cursor;
+            lab0: do {
+                // literal, line 373
+                if (!(eq_s("\u0641\u0627".length(),"\u0641\u0627")))
+                {
+                    break lab0;
+                }
+                return false;
+            } while (false);
+            cursor = v_1;
+        }
+        // not, line 374
+        {
+            int v_2 = cursor;
+            lab1: do {
+                // literal, line 374
+                if (!(eq_s("\u0648\u0627".length(),"\u0648\u0627")))
+                {
+                    break lab1;
+                }
+                return false;
+            } while (false);
+            cursor = v_2;
+        }
+        // [, line 375
+        bra = cursor;
+        // substring, line 375
+        among_var = find_among(a_5, a_5.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 375
+        ket = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 376
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // delete, line 376
+                slice_del();
+                break;
+            case 2:
+                // (, line 377
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // delete, line 377
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Prefix_Step3a_Noun() {
+        int among_var;
+        // (, line 381
+        I_word_len = getCurrent().length();
+        // [, line 383
+        bra = cursor;
+        // substring, line 383
+        among_var = find_among(a_6, a_6.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 383
+        ket = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 384
+                if (!(I_word_len > 5))
+                {
+                    return false;
+                }
+                // delete, line 384
+                slice_del();
+                break;
+            case 2:
+                // (, line 385
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // delete, line 385
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Prefix_Step3b_Noun() {
+        int among_var;
+        // (, line 389
+        I_word_len = getCurrent().length();
+        // not, line 391
+        {
+            int v_1 = cursor;
+            lab0: do {
+                // literal, line 391
+                if (!(eq_s("\u0628\u0627".length(),"\u0628\u0627")))
+                {
+                    break lab0;
+                }
+                return false;
+            } while (false);
+            cursor = v_1;
+        }
+        // [, line 392
+        bra = cursor;
+        // substring, line 392
+        among_var = find_among(a_7, a_7.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 392
+        ket = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 393
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // delete, line 393
+                slice_del();
+                break;
+            case 2:
+                // (, line 395
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // <-, line 395
+                slice_from("\u0628");
+                break;
+            case 3:
+                // (, line 396
+                if (!(I_word_len > 3))
+                {
+                    return false;
+                }
+                // <-, line 396
+                slice_from("\u0643");
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Prefix_Step3_Verb() {
+        int among_var;
+        // (, line 401
+        I_word_len = getCurrent().length();
+        // [, line 403
+        bra = cursor;
+        // substring, line 403
+        among_var = find_among(a_8, a_8.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 403
+        ket = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 405
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // <-, line 405
+                slice_from("\u064A");
+                break;
+            case 2:
+                // (, line 406
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // <-, line 406
+                slice_from("\u062A");
+                break;
+            case 3:
+                // (, line 407
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // <-, line 407
+                slice_from("\u0646");
+                break;
+            case 4:
+                // (, line 408
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // <-, line 408
+                slice_from("\u0623");
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Prefix_Step4_Verb() {
+        int among_var;
+        // (, line 412
+        I_word_len = getCurrent().length();
+        // [, line 414
+        bra = cursor;
+        // substring, line 414
+        among_var = find_among(a_9, a_9.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 414
+        ket = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 415
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // set is_verb, line 415
+                B_is_verb = true;
+                // unset is_noun, line 415
+                B_is_noun = false;
+                // <-, line 415
+                slice_from("\u0627\u0633\u062A");
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Noun_Step1a() {
+        int among_var;
+        // (, line 422
+        I_word_len = getCurrent().length();
+        // [, line 424
+        ket = cursor;
+        // substring, line 424
+        among_var = find_among_b(a_10, a_10.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 424
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 425
+                if (!(I_word_len >= 4))
+                {
+                    return false;
+                }
+                // delete, line 425
+                slice_del();
+                break;
+            case 2:
+                // (, line 426
+                if (!(I_word_len >= 5))
+                {
+                    return false;
+                }
+                // delete, line 426
+                slice_del();
+                break;
+            case 3:
+                // (, line 427
+                if (!(I_word_len >= 6))
+                {
+                    return false;
+                }
+                // delete, line 427
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Noun_Step1b() {
+        int among_var;
+        // (, line 430
+        I_word_len = getCurrent().length();
+        // [, line 432
+        ket = cursor;
+        // substring, line 432
+        among_var = find_among_b(a_11, a_11.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 432
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 433
+                if (!(I_word_len > 5))
+                {
+                    return false;
+                }
+                // delete, line 433
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Noun_Step2a() {
+        int among_var;
+        // (, line 437
+        I_word_len = getCurrent().length();
+        // [, line 439
+        ket = cursor;
+        // substring, line 439
+        among_var = find_among_b(a_12, a_12.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 439
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 440
+                if (!(I_word_len > 4))
+                {
+                    return false;
+                }
+                // delete, line 440
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Noun_Step2b() {
+        int among_var;
+        // (, line 444
+        I_word_len = getCurrent().length();
+        // [, line 446
+        ket = cursor;
+        // substring, line 446
+        among_var = find_among_b(a_13, a_13.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 446
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 447
+                if (!(I_word_len >= 5))
+                {
+                    return false;
+                }
+                // delete, line 447
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Noun_Step2c1() {
+        int among_var;
+        // (, line 451
+        I_word_len = getCurrent().length();
+        // [, line 453
+        ket = cursor;
+        // substring, line 453
+        among_var = find_among_b(a_14, a_14.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 453
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 454
+                if (!(I_word_len >= 4))
+                {
+                    return false;
+                }
+                // delete, line 454
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Noun_Step2c2() {
+        int among_var;
+        // (, line 457
+        I_word_len = getCurrent().length();
+        // [, line 459
+        ket = cursor;
+        // substring, line 459
+        among_var = find_among_b(a_15, a_15.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 459
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 460
+                if (!(I_word_len >= 4))
+                {
+                    return false;
+                }
+                // delete, line 460
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Noun_Step3() {
+        int among_var;
+        // (, line 463
+        I_word_len = getCurrent().length();
+        // [, line 465
+        ket = cursor;
+        // substring, line 465
+        among_var = find_among_b(a_16, a_16.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 465
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 466
+                if (!(I_word_len >= 3))
+                {
+                    return false;
+                }
+                // delete, line 466
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Verb_Step1() {
+        int among_var;
+        // (, line 470
+        I_word_len = getCurrent().length();
+        // [, line 472
+        ket = cursor;
+        // substring, line 472
+        among_var = find_among_b(a_17, a_17.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 472
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 473
+                if (!(I_word_len >= 4))
+                {
+                    return false;
+                }
+                // delete, line 473
+                slice_del();
+                break;
+            case 2:
+                // (, line 474
+                if (!(I_word_len >= 5))
+                {
+                    return false;
+                }
+                // delete, line 474
+                slice_del();
+                break;
+            case 3:
+                // (, line 475
+                if (!(I_word_len >= 6))
+                {
+                    return false;
+                }
+                // delete, line 475
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Verb_Step2a() {
+        int among_var;
+        // (, line 478
+        I_word_len = getCurrent().length();
+        // [, line 480
+        ket = cursor;
+        // substring, line 480
+        among_var = find_among_b(a_18, a_18.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 480
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 481
+                if (!(I_word_len >= 4))
+                {
+                    return false;
+                }
+                // delete, line 481
+                slice_del();
+                break;
+            case 2:
+                // (, line 482
+                if (!(I_word_len >= 4))
+                {
+                    return false;
+                }
+                // delete, line 482
+                slice_del();
+                break;
+            case 3:
+                // (, line 483
+                if (!(I_word_len >= 5))
+                {
+                    return false;
+                }
+                // delete, line 483
+                slice_del();
+                break;
+            case 4:
+                // (, line 484
+                if (!(I_word_len > 5))
+                {
+                    return false;
+                }
+                // delete, line 484
+                slice_del();
+                break;
+            case 5:
+                // (, line 485
+                if (!(I_word_len >= 6))
+                {
+                    return false;
+                }
+                // delete, line 485
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Verb_Step2b() {
+        int among_var;
+        // (, line 489
+        I_word_len = getCurrent().length();
+        // [, line 491
+        ket = cursor;
+        // substring, line 491
+        among_var = find_among_b(a_19, a_19.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 491
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 492
+                if (!(I_word_len >= 5))
+                {
+                    return false;
+                }
+                // delete, line 492
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_Verb_Step2c() {
+        int among_var;
+        // (, line 497
+        I_word_len = getCurrent().length();
+        // [, line 499
+        ket = cursor;
+        // substring, line 499
+        among_var = find_among_b(a_20, a_20.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 499
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 500
+                if (!(I_word_len >= 4))
+                {
+                    return false;
+                }
+                // delete, line 500
+                slice_del();
+                break;
+            case 2:
+                // (, line 501
+                if (!(I_word_len >= 6))
+                {
+                    return false;
+                }
+                // delete, line 501
+                slice_del();
+                break;
+        }
+        return true;
+    }
+
+    private boolean r_Suffix_All_alef_maqsura() {
+        int among_var;
+        // (, line 505
+        I_word_len = getCurrent().length();
+        // [, line 507
+        ket = cursor;
+        // substring, line 507
+        among_var = find_among_b(a_21, a_21.length);
+        if (among_var == 0)
+        {
+            return false;
+        }
+        // ], line 507
+        bra = cursor;
+        switch (among_var) {
+            case 0:
+                return false;
+            case 1:
+                // (, line 508
+                // <-, line 508
+                slice_from("\u064A");
+                break;
+        }
+        return true;
+    }
+
+    public boolean stem() {
+        // (, line 515
+        // set is_noun, line 517
+        B_is_noun = true;
+        // set is_verb, line 518
+        B_is_verb = true;
+        // unset is_defined, line 519
+        B_is_defined = false;
+        // do, line 522
+        int v_1 = cursor;
+        lab0: do {
+            // call Checks1, line 522
+            if (!r_Checks1())
+            {
+                break lab0;
+            }
+        } while (false);
+        cursor = v_1;
+        // do, line 525
+        int v_2 = cursor;
+        lab1: do {
+            // call Normalize_pre, line 525
+            if (!r_Normalize_pre())
+            {
+                break lab1;
+            }
+        } while (false);
+        cursor = v_2;
+        // backwards, line 528
+        limit_backward = cursor;
+        cursor = limit;
+        // (, line 528
+        // do, line 530
+        int v_3 = limit - cursor;
+        lab2: do {
+            // (, line 530
+            // or, line 544
+            lab3: do {
+                int v_4 = limit - cursor;
+                lab4: do {
+                    // (, line 532
+                    // Boolean test is_verb, line 533
+                    if (!(B_is_verb))
+                    {
+                        break lab4;
+                    }
+                    // (, line 534
+                    // or, line 539
+                    lab5: do {
+                        int v_5 = limit - cursor;
+                        lab6: do {
+                            // (, line 535
+                            // (, line 536
+                            // atleast, line 536
+                            {
+                                int v_6 = 1;
+                                // atleast, line 536
+                                replab7: while(true)
+                                {
+                                    int v_7 = limit - cursor;
+                                    lab8: do {
+                                        // call Suffix_Verb_Step1, line 536
+                                        if (!r_Suffix_Verb_Step1())
+                                        {
+                                            break lab8;
+                                        }
+                                        v_6--;
+                                        continue replab7;
+                                    } while (false);
+                                    cursor = limit - v_7;
+                                    break replab7;
+                                }
+                                if (v_6 > 0)
+                                {
+                                    break lab6;
+                                }
+                            }
+                            // (, line 537
+                            // or, line 537
+                            lab9: do {
+                                int v_8 = limit - cursor;
+                                lab10: do {
+                                    // call Suffix_Verb_Step2a, line 537
+                                    if (!r_Suffix_Verb_Step2a())
+                                    {
+                                        break lab10;
+                                    }
+                                    break lab9;
+                                } while (false);
+                                cursor = limit - v_8;
+                                lab11: do {
+                                    // call Suffix_Verb_Step2c, line 537
+                                    if (!r_Suffix_Verb_Step2c())
+                                    {
+                                        break lab11;
+                                    }
+                                    break lab9;
+                                } while (false);
+                                cursor = limit - v_8;
+                                // next, line 537
+                                if (cursor <= limit_backward)
+                                {
+                                    break lab6;
+                                }
+                                cursor--;
+                            } while (false);
+                            break lab5;
+                        } while (false);
+                        cursor = limit - v_5;
+                        lab12: do {
+                            // call Suffix_Verb_Step2b, line 539
+                            if (!r_Suffix_Verb_Step2b())
+                            {
+                                break lab12;
+                            }
+                            break lab5;
+                        } while (false);
+                        cursor = limit - v_5;
+                        // call Suffix_Verb_Step2a, line 540
+                        if (!r_Suffix_Verb_Step2a())
+                        {
+                            break lab4;
+                        }
+                    } while (false);
+                    break lab3;
+                } while (false);
+                cursor = limit - v_4;
+                lab13: do {
+                    // (, line 544
+                    // Boolean test is_noun, line 545
+                    if (!(B_is_noun))
+                    {
+                        break lab13;
+                    }
+                    // (, line 546
+                    // try, line 548
+                    int v_9 = limit - cursor;
+                    lab14: do {
+                        // (, line 548
+                        // or, line 550
+                        lab15: do {
+                            int v_10 = limit - cursor;
+                            lab16: do {
+                                // call Suffix_Noun_Step2c2, line 549
+                                if (!r_Suffix_Noun_Step2c2())
+                                {
+                                    break lab16;
+                                }
+                                break lab15;
+                            } while (false);
+                            cursor = limit - v_10;
+                            lab17: do {
+                                // (, line 550
+                                // not, line 550
+                                lab18: do {
+                                    // Boolean test is_defined, line 550
+                                    if (!(B_is_defined))
+                                    {
+                                        break lab18;
+                                    }
+                                    break lab17;
+                                } while (false);
+                                // call Suffix_Noun_Step1a, line 550
+                                if (!r_Suffix_Noun_Step1a())
+                                {
+                                    break lab17;
+                                }
+                                // (, line 550
+                                // or, line 552
+                                lab19: do {
+                                    int v_12 = limit - cursor;
+                                    lab20: do {
+                                        // call Suffix_Noun_Step2a, line 551
+                                        if (!r_Suffix_Noun_Step2a())
+                                        {
+                                            break lab20;
+                                        }
+                                        break lab19;
+                                    } while (false);
+                                    cursor = limit - v_12;
+                                    lab21: do {
+                                        // call Suffix_Noun_Step2b, line 552
+                                        if (!r_Suffix_Noun_Step2b())
+                                        {
+                                            break lab21;
+                                        }
+                                        break lab19;
+                                    } while (false);
+                                    cursor = limit - v_12;
+                                    lab22: do {
+                                        // call Suffix_Noun_Step2c1, line 553
+                                        if (!r_Suffix_Noun_Step2c1())
+                                        {
+                                            break lab22;
+                                        }
+                                        break lab19;
+                                    } while (false);
+                                    cursor = limit - v_12;
+                                    // next, line 554
+                                    if (cursor <= limit_backward)
+                                    {
+                                        break lab17;
+                                    }
+                                    cursor--;
+                                } while (false);
+                                break lab15;
+                            } while (false);
+                            cursor = limit - v_10;
+                            lab23: do {
+                                // (, line 555
+                                // call Suffix_Noun_Step1b, line 555
+                                if (!r_Suffix_Noun_Step1b())
+                                {
+                                    break lab23;
+                                }
+                                // (, line 555
+                                // or, line 557
+                                lab24: do {
+                                    int v_13 = limit - cursor;
+                                    lab25: do {
+                                        // call Suffix_Noun_Step2a, line 556
+                                        if (!r_Suffix_Noun_Step2a())
+                                        {
+                                            break lab25;
+                                        }
+                                        break lab24;
+                                    } while (false);
+                                    cursor = limit - v_13;
+                                    lab26: do {
+                                        // call Suffix_Noun_Step2b, line 557
+                                        if (!r_Suffix_Noun_Step2b())
+                                        {
+                                            break lab26;
+                                        }
+                                        break lab24;
+                                    } while (false);
+                                    cursor = limit - v_13;
+                                    // call Suffix_Noun_Step2c1, line 558
+                                    if (!r_Suffix_Noun_Step2c1())
+                                    {
+                                        break lab23;
+                                    }
+                                } while (false);
+                                break lab15;
+                            } while (false);
+                            cursor = limit - v_10;
+                            lab27: do {
+                                // (, line 559
+                                // not, line 559
+                                lab28: do {
+                                    // Boolean test is_defined, line 559
+                                    if (!(B_is_defined))
+                                    {
+                                        break lab28;
+                                    }
+                                    break lab27;
+                                } while (false);
+                                // call Suffix_Noun_Step2a, line 559
+                                if (!r_Suffix_Noun_Step2a())
+                                {
+                                    break lab27;
+                                }
+                                break lab15;
+                            } while (false);
+                            cursor = limit - v_10;
+                            // (, line 560
+                            // call Suffix_Noun_Step2b, line 560
+                            if (!r_Suffix_Noun_Step2b())
+                            {
+                                cursor = limit - v_9;
+                                break lab14;
+                            }
+                        } while (false);
+                    } while (false);
+                    // call Suffix_Noun_Step3, line 562
+                    if (!r_Suffix_Noun_Step3())
+                    {
+                        break lab13;
+                    }
+                    break lab3;
+                } while (false);
+                cursor = limit - v_4;
+                // call Suffix_All_alef_maqsura, line 568
+                if (!r_Suffix_All_alef_maqsura())
+                {
+                    break lab2;
+                }
+            } while (false);
+        } while (false);
+        cursor = limit - v_3;
+        cursor = limit_backward;
+        // do, line 573
+        int v_15 = cursor;
+        lab29: do {
+            // (, line 573
+            // try, line 574
+            int v_16 = cursor;
+            lab30: do {
+                // call Prefix_Step1, line 574
+                if (!r_Prefix_Step1())
+                {
+                    cursor = v_16;
+                    break lab30;
+                }
+            } while (false);
+            // try, line 575
+            int v_17 = cursor;
+            lab31: do {
+                // call Prefix_Step2, line 575
+                if (!r_Prefix_Step2())
+                {
+                    cursor = v_17;
+                    break lab31;
+                }
+            } while (false);
+            // (, line 576
+            // or, line 577
+            lab32: do {
+                int v_18 = cursor;
+                lab33: do {
+                    // call Prefix_Step3a_Noun, line 576
+                    if (!r_Prefix_Step3a_Noun())
+                    {
+                        break lab33;
+                    }
+                    break lab32;
+                } while (false);
+                cursor = v_18;
+                lab34: do {
+                    // (, line 577
+                    // Boolean test is_noun, line 577
+                    if (!(B_is_noun))
+                    {
+                        break lab34;
+                    }
+                    // call Prefix_Step3b_Noun, line 577
+                    if (!r_Prefix_Step3b_Noun())
+                    {
+                        break lab34;
+                    }
+                    break lab32;
+                } while (false);
+                cursor = v_18;
+                // (, line 578
+                // Boolean test is_verb, line 578
+                if (!(B_is_verb))
+                {
+                    break lab29;
+                }
+                // try, line 578
+                int v_19 = cursor;
+                lab35: do {
+                    // call Prefix_Step3_Verb, line 578
+                    if (!r_Prefix_Step3_Verb())
+                    {
+                        cursor = v_19;
+                        break lab35;
+                    }
+                } while (false);
+                // call Prefix_Step4_Verb, line 578
+                if (!r_Prefix_Step4_Verb())
+                {
+                    break lab29;
+                }
+            } while (false);
+        } while (false);
+        cursor = v_15;
+        // do, line 583
+        int v_20 = cursor;
+        lab36: do {
+            // call Normalize_post, line 583
+            if (!r_Normalize_post())
+            {
+                break lab36;
+            }
+        } while (false);
+        cursor = v_20;
+        return true;
+    }
+
+    public boolean equals( Object o ) {
+        return o instanceof ArabicStemmer;
+    }
+
+    public int hashCode() {
+        return ArabicStemmer.class.getName().hashCode();
+    }
+
+
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c567d4f/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
index d215e02..bba8d33 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
@@ -36,6 +36,7 @@ public class TestSnowballVocab extends LuceneTestCase {
    * Run all languages against their snowball vocabulary tests.
    */
   public void testStemmers() throws IOException {
+    assertCorrectOutput("Arabic", "arabic");
     assertCorrectOutput("Danish", "danish");
     assertCorrectOutput("Dutch", "dutch");
     assertCorrectOutput("English", "english");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c567d4f/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocabData.zip
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocabData.zip b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocabData.zip
index 8831d8a..e3cae65 100644
Binary files a/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocabData.zip and b/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocabData.zip differ