You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/10/16 05:55:33 UTC

svn commit: r1184754 [1/33] - in /lucene/dev/trunk: dev-tools/eclipse/ solr/ solr/contrib/langid/ solr/contrib/langid/lib/ solr/contrib/langid/src/java/org/apache/solr/update/processor/ solr/contrib/langid/src/resources/ solr/contrib/langid/src/resourc...

Author: rmuir
Date: Sun Oct 16 03:55:30 2011
New Revision: 1184754

URL: http://svn.apache.org/viewvc?rev=1184754&view=rev
Log:
SOLR-2839: add alternative language detection impl

Added:
    lucene/dev/trunk/solr/contrib/langid/lib/
    lucene/dev/trunk/solr/contrib/langid/lib/jsonic-1.2.0.jar   (with props)
    lucene/dev/trunk/solr/contrib/langid/lib/jsonic-LICENSE.txt   (with props)
    lucene/dev/trunk/solr/contrib/langid/lib/langdetect-LICENSE.txt   (with props)
    lucene/dev/trunk/solr/contrib/langid/lib/langdetect-r111.jar   (with props)
    lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java   (with props)
    lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java   (with props)
    lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java   (with props)
    lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
      - copied, changed from r1183754, lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactory.java
    lucene/dev/trunk/solr/contrib/langid/src/resources/
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/af
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ar
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/bg
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/bn
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/cs
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/da
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/de
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/el
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/en
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/es
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/et
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/fa
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/fi
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/fr
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/gu
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/he
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/hi
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/hr
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/hu
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/id
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/it
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ja
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/kn
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ko
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/lt
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/lv
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/mk
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ml
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/mr
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ne
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/nl
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/no
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/pa
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/pl
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/pt
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ro
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ru
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/sk
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/sl
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/so
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/sq
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/sv
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/sw
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ta
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/te
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/th
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/tl
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/tr
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/uk
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/ur
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/vi
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/zh-cn
    lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/zh-tw
    lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java   (with props)
    lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
      - copied, changed from r1183754, lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTest.java
    lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java   (with props)
Removed:
    lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactory.java
    lucene/dev/trunk/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTest.java
Modified:
    lucene/dev/trunk/dev-tools/eclipse/dot.classpath
    lucene/dev/trunk/solr/NOTICE.txt
    lucene/dev/trunk/solr/contrib/langid/CHANGES.txt
    lucene/dev/trunk/solr/contrib/langid/build.xml
    lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
    lucene/dev/trunk/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml
    lucene/dev/trunk/solr/example/solr/conf/solrconfig.xml

Modified: lucene/dev/trunk/dev-tools/eclipse/dot.classpath
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/eclipse/dot.classpath?rev=1184754&r1=1184753&r2=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/eclipse/dot.classpath (original)
+++ lucene/dev/trunk/dev-tools/eclipse/dot.classpath Sun Oct 16 03:55:30 2011
@@ -33,12 +33,12 @@
 	<classpathentry kind="src" path="modules/analysis/stempel/src/resources"/>
 	<classpathentry kind="src" path="modules/analysis/stempel/src/test"/>
 	<classpathentry kind="src" path="modules/analysis/morfologik/src/java"/>
-	<classpathentry kind="src" path="modules/analysis/morfologik/src/test"/>	
+	<classpathentry kind="src" path="modules/analysis/morfologik/src/test"/>
 	<classpathentry kind="src" path="modules/benchmark/src/java"/>
 	<classpathentry kind="src" path="modules/benchmark/src/test"/>
 	<classpathentry kind="src" path="modules/facet/src/java"/>
- 	<classpathentry kind="src" path="modules/facet/src/examples"/>
- 	<classpathentry kind="src" path="modules/facet/src/test"/>
+	<classpathentry kind="src" path="modules/facet/src/examples"/>
+	<classpathentry kind="src" path="modules/facet/src/test"/>
 	<classpathentry kind="src" path="modules/grouping/src/java"/>
 	<classpathentry kind="src" path="modules/grouping/src/test"/>
 	<classpathentry kind="src" path="modules/join/src/java"/>
@@ -71,18 +71,19 @@
 	<classpathentry kind="src" path="solr/contrib/extraction/src/java"/>
 	<classpathentry kind="src" path="solr/contrib/extraction/src/test"/>
 	<classpathentry kind="src" path="solr/contrib/extraction/src/test-files"/>
-  <classpathentry kind="src" path="solr/contrib/langid/src/java"/>
-  <classpathentry kind="src" path="solr/contrib/langid/src/test"/>
-  <classpathentry kind="src" path="solr/contrib/langid/src/test-files"/>
+	<classpathentry kind="src" path="solr/contrib/langid/src/java"/>
+	<classpathentry kind="src" path="solr/contrib/langid/src/test"/>
+	<classpathentry kind="src" path="solr/contrib/langid/src/test-files"/>
 	<classpathentry kind="src" path="solr/contrib/uima/src/java"/>
 	<classpathentry kind="src" path="solr/contrib/uima/src/resources"/>
 	<classpathentry kind="src" path="solr/contrib/uima/src/test"/>
 	<classpathentry kind="src" path="solr/contrib/uima/src/test-files"/>
+	<classpathentry kind="src" path="solr/contrib/langid/src/resources"/>
 	<classpathentry kind="lib" path="lucene/lib/ant-1.7.1.jar"/>
 	<classpathentry kind="lib" path="lucene/lib/ant-junit-1.7.1.jar"/>
 	<classpathentry kind="lib" path="lucene/lib/junit-4.7.jar"/>
 	<classpathentry kind="lib" path="lucene/contrib/sandbox/lib/jakarta-regexp-1.4.jar"/>
-	<classpathentry kind="lib" path="modules/analysis/icu/lib/icu4j-4_8.jar"/>
+	<classpathentry kind="lib" path="modules/analysis/icu/lib/icu4j-4_8_1_1.jar"/>
 	<classpathentry kind="lib" path="modules/analysis/phonetic/lib/commons-codec-1.4.jar"/>
 	<classpathentry kind="lib" path="modules/analysis/morfologik/lib/morfologik-fsa-1.5.2.jar"/>
 	<classpathentry kind="lib" path="modules/analysis/morfologik/lib/morfologik-polish-1.5.2.jar"/>
@@ -156,6 +157,8 @@
 	<classpathentry kind="lib" path="solr/contrib/uima/lib/uima-an-tagger-2.3.1.jar"/>
 	<classpathentry kind="lib" path="solr/contrib/uima/lib/uima-an-wst-2.3.1.jar"/>
 	<classpathentry kind="lib" path="solr/contrib/uima/lib/uimaj-core-2.3.1.jar"/>
+	<classpathentry kind="lib" path="solr/contrib/langid/lib/langdetect-r111.jar"/>
+	<classpathentry kind="lib" path="solr/contrib/langid/lib/jsonic-1.2.0.jar"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
 	<classpathentry kind="output" path="bin"/>
 </classpath>

Modified: lucene/dev/trunk/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/NOTICE.txt?rev=1184754&r1=1184753&r2=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/NOTICE.txt (original)
+++ lucene/dev/trunk/solr/NOTICE.txt Sun Oct 16 03:55:30 2011
@@ -253,6 +253,18 @@ Copyright 2004 Sun Microsystems, Inc. (R
 
 Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/)
  
+=========================================================================
+==  Language Detection Notices                                         ==
+=========================================================================
+
+The following notices apply to the libraries in contrib/langid/lib:
+
+This product includes software developed by Cybozu Labs, Inc.
+(c)2010 All rights reserved by Cybozu Labs, Inc.
+http://code.google.com/p/language-detection/
+
+This product includes software developed by the Jsonic project:
+http://sourceforge.jp/projects/jsonic/
 
 =========================================================================
 ==     Carrot2 Notice                                                  ==

Modified: lucene/dev/trunk/solr/contrib/langid/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/CHANGES.txt?rev=1184754&r1=1184753&r2=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/contrib/langid/CHANGES.txt Sun Oct 16 03:55:30 2011
@@ -13,3 +13,6 @@ Initial release.  See README.txt.
 
 * SOLR-1979: New contrib "langid". Adds language identification capabilities as an 
   Update Processor, using Tika's LanguageIdentifier (janhoy, Tommaso Teofili, gsingers)
+
+* SOLR-2839: Add alternative implementation supporting 53 languages, 
+  based on http://code.google.com/p/language-detection/ (rmuir)

Modified: lucene/dev/trunk/solr/contrib/langid/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/build.xml?rev=1184754&r1=1184753&r2=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/build.xml (original)
+++ lucene/dev/trunk/solr/contrib/langid/build.xml Sun Oct 16 03:55:30 2011
@@ -27,6 +27,7 @@
 
 	<path id="classpath">
     <fileset dir="../extraction/lib" includes="*.jar"/>
+    <fileset dir="lib" includes="*.jar"/>
     <path refid="solr.base.classpath"/>   
   </path>
 

Added: lucene/dev/trunk/solr/contrib/langid/lib/jsonic-1.2.0.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/lib/jsonic-1.2.0.jar?rev=1184754&view=auto
==============================================================================
Binary file - no diff available.

Added: lucene/dev/trunk/solr/contrib/langid/lib/jsonic-LICENSE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/lib/jsonic-LICENSE.txt?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/lib/jsonic-LICENSE.txt (added)
+++ lucene/dev/trunk/solr/contrib/langid/lib/jsonic-LICENSE.txt Sun Oct 16 03:55:30 2011
@@ -0,0 +1,201 @@
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Added: lucene/dev/trunk/solr/contrib/langid/lib/langdetect-LICENSE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/lib/langdetect-LICENSE.txt?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/lib/langdetect-LICENSE.txt (added)
+++ lucene/dev/trunk/solr/contrib/langid/lib/langdetect-LICENSE.txt Sun Oct 16 03:55:30 2011
@@ -0,0 +1,13 @@
+(c)2010 All rights reserved by Cybozu Labs, Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       <a href="http://www.apache.org/licenses/LICENSE-2.0">http://www.apache.org/licenses/LICENSE-2.0</a>
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Added: lucene/dev/trunk/solr/contrib/langid/lib/langdetect-r111.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/lib/langdetect-r111.jar?rev=1184754&view=auto
==============================================================================
Binary file - no diff available.

Added: lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java (added)
+++ lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java Sun Oct 16 03:55:30 2011
@@ -0,0 +1,66 @@
+package org.apache.solr.update.processor;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+
+import com.cybozu.labs.langdetect.Detector;
+import com.cybozu.labs.langdetect.DetectorFactory;
+import com.cybozu.labs.langdetect.LangDetectException;
+import com.cybozu.labs.langdetect.Language;
+
+/**
+ * Identifies the language of a set of input fields using http://code.google.com/p/language-detection
+ * <p>
+ * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
+ * @since 3.5
+ */
+public class LangDetectLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpdateProcessor {
+  
+  public LangDetectLanguageIdentifierUpdateProcessor(SolrQueryRequest req, 
+      SolrQueryResponse rsp, UpdateRequestProcessor next) {
+    super(req, rsp, next);
+  }
+
+  @Override
+  protected List<DetectedLanguage> detectLanguage(String content) {
+    if (content.trim().length() == 0) { // to be consistent with the tika impl?
+      log.debug("No input text to detect language from, returning empty list");
+      return Collections.emptyList();
+    }
+    
+    try {
+      Detector detector = DetectorFactory.create();
+      detector.append(content);
+      ArrayList<Language> langlist = detector.getProbabilities();
+      ArrayList<DetectedLanguage> solrLangList = new ArrayList<DetectedLanguage>();
+      for (Language l: langlist) {
+        solrLangList.add(new DetectedLanguage(l.lang, l.prob));
+      }
+      return solrLangList;
+    } catch (LangDetectException e) {
+      log.debug("Could not determine language, returning empty list: ", e);
+      return Collections.emptyList();
+    }
+  }
+}

Added: lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java (added)
+++ lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java Sun Oct 16 03:55:30 2011
@@ -0,0 +1,136 @@
+package org.apache.solr.update.processor;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+import com.cybozu.labs.langdetect.DetectorFactory;
+import com.cybozu.labs.langdetect.LangDetectException;
+
+/**
+ * Identifies the language of a set of input fields using 
+ * http://code.google.com/p/language-detection
+ * <p/>
+ * The UpdateProcessorChain config entry can take a number of parameters
+ * which may also be passed as HTTP parameters on the update request
+ * and override the defaults. Here is the simplest processor config possible:
+ * 
+ * <pre class="prettyprint" >
+ * &lt;processor class=&quot;org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory&quot;&gt;
+ *   &lt;str name=&quot;langid.fl&quot;&gt;title,text&lt;/str&gt;
+ *   &lt;str name=&quot;langid.langField&quot;&gt;language_s&lt;/str&gt;
+ * &lt;/processor&gt;
+ * </pre>
+ * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
+ * @since 3.5
+ */
+public class LangDetectLanguageIdentifierUpdateProcessorFactory extends
+        UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams {
+
+  protected SolrParams defaults;
+  protected SolrParams appends;
+  protected SolrParams invariants;
+
+  @Override
+  public void inform(SolrCore core) {
+  }
+
+  /**
+   * The UpdateRequestProcessor may be initialized in solrconfig.xml similarly
+   * to a RequestHandler, with defaults, appends and invariants.
+   * @param args a NamedList with the configuration parameters 
+   */
+  @SuppressWarnings("rawtypes")
+  public void init( NamedList args )
+  {
+    try {
+      loadData();
+    } catch (Exception e) {
+      throw new RuntimeException("Couldn't load profile data, will return empty languages always!", e);
+    }
+    if (args != null) {
+      Object o;
+      o = args.get("defaults");
+      if (o != null && o instanceof NamedList) {
+        defaults = SolrParams.toSolrParams((NamedList) o);
+      } else {
+        defaults = SolrParams.toSolrParams(args);
+      }
+      o = args.get("appends");
+      if (o != null && o instanceof NamedList) {
+        appends = SolrParams.toSolrParams((NamedList) o);
+      }
+      o = args.get("invariants");
+      if (o != null && o instanceof NamedList) {
+        invariants = SolrParams.toSolrParams((NamedList) o);
+      }
+    }
+  }
+
+  @Override
+  public UpdateRequestProcessor getInstance(SolrQueryRequest req,
+                                            SolrQueryResponse rsp, UpdateRequestProcessor next) {
+    // Process defaults, appends and invariants if we got a request
+    if(req != null) {
+      SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
+    }
+    return new LangDetectLanguageIdentifierUpdateProcessor(req, rsp, next);
+  }
+  
+  
+  // DetectorFactory is totally global, so we only want to do this once... ever!!!
+  static boolean loaded;
+  
+  // profiles we will load from classpath
+  static final String languages[] = {
+    "af", "ar", "bg", "bn", "cs", "da", "de", "el", "en", "es", "et", "fa", "fi", "fr", "gu",
+    "he", "hi", "hr", "hu", "id", "it", "ja", "kn", "ko", "lt", "lv", "mk", "ml", "mr", "ne",
+    "nl", "no", "pa", "pl", "pt", "ro", "ru", "sk", "sl", "so", "sq", "sv", "sw", "ta", "te",
+    "th", "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw"
+  };
+
+  public static synchronized void loadData() throws IOException, LangDetectException {
+    if (loaded)
+      return;
+    loaded = true;
+    List<String> profileData = new ArrayList<String>();
+    Charset encoding = Charset.forName("UTF-8");
+    for (String language : languages) {
+      InputStream stream = LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream("langdetect-profiles/" + language);
+      BufferedReader reader = new BufferedReader(new InputStreamReader(stream, encoding));
+      profileData.add(new String(IOUtils.toCharArray(reader)));
+      reader.close();
+    }
+    DetectorFactory.loadProfile(profileData);
+  }
+}
\ No newline at end of file

Modified: lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java?rev=1184754&r1=1184753&r2=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java Sun Oct 16 03:55:30 2011
@@ -26,7 +26,6 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.update.AddUpdateCommand;
-import org.apache.tika.language.LanguageIdentifier;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -40,15 +39,15 @@ import java.util.regex.Pattern;
 
 
 /**
- * Identifies the language of a set of input fields using Tika's
- * LanguageIdentifier. Also supports mapping of field names based
+ * Identifies the language of a set of input fields. 
+ * Also supports mapping of field names based
  * on detected language. 
- * The tika-core-x.y.jar must be on the classpath
  * <p>
  * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
  * @since 3.5
+ * @lucene.experimental
  */
-public class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
+public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
 
   protected final static Logger log = LoggerFactory
           .getLogger(LanguageIdentifierUpdateProcessor.class);
@@ -300,23 +299,7 @@ public class LanguageIdentifierUpdatePro
    * @param content The content to identify
    * @return List of detected language(s) according to RFC-3066
    */
-  protected List<DetectedLanguage> detectLanguage(String content) {
-    List<DetectedLanguage> languages = new ArrayList<DetectedLanguage>();
-    if(content.trim().length() != 0) { 
-      LanguageIdentifier identifier = new LanguageIdentifier(content);
-      // FIXME: Hack - we get the distance from toString and calculate our own certainty score
-      Double distance = Double.parseDouble(tikaSimilarityPattern.matcher(identifier.toString()).replaceFirst("$1"));
-      // This formula gives: 0.02 => 0.8, 0.1 => 0.5 which is a better sweetspot than isReasonablyCertain()
-      Double certainty = 1 - (5 * distance); 
-      certainty = (certainty < 0) ? 0 : certainty;
-      DetectedLanguage language = new DetectedLanguage(identifier.getLanguage(), certainty);
-      languages.add(language);
-      log.debug("Language detected as "+language+" with a certainty of "+language.getCertainty()+" (Tika distance="+identifier.toString()+")");
-    } else {
-      log.debug("No input text to detect language from, returning empty list");
-    }
-    return languages;
-  }
+  protected abstract List<DetectedLanguage> detectLanguage(String content);
 
   /**
    * Chooses a language based on the list of candidates detected 

Added: lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java (added)
+++ lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java Sun Oct 16 03:55:30 2011
@@ -0,0 +1,60 @@
+package org.apache.solr.update.processor;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.tika.language.LanguageIdentifier;
+
+/**
+ * Identifies the language of a set of input fields using Tika's
+ * LanguageIdentifier.
+ * The tika-core-x.y.jar must be on the classpath
+ * <p>
+ * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
+ * @since 3.5
+ */
+public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpdateProcessor {
+
+  public TikaLanguageIdentifierUpdateProcessor(SolrQueryRequest req,
+      SolrQueryResponse rsp, UpdateRequestProcessor next) {
+    super(req, rsp, next);
+  }
+  
+  @Override
+  protected List<DetectedLanguage> detectLanguage(String content) {
+    List<DetectedLanguage> languages = new ArrayList<DetectedLanguage>();
+    if(content.trim().length() != 0) { 
+      LanguageIdentifier identifier = new LanguageIdentifier(content);
+      // FIXME: Hack - we get the distance from toString and calculate our own certainty score
+      Double distance = Double.parseDouble(tikaSimilarityPattern.matcher(identifier.toString()).replaceFirst("$1"));
+      // This formula gives: 0.02 => 0.8, 0.1 => 0.5 which is a better sweetspot than isReasonablyCertain()
+      Double certainty = 1 - (5 * distance); 
+      certainty = (certainty < 0) ? 0 : certainty;
+      DetectedLanguage language = new DetectedLanguage(identifier.getLanguage(), certainty);
+      languages.add(language);
+      log.debug("Language detected as "+language+" with a certainty of "+language.getCertainty()+" (Tika distance="+identifier.toString()+")");
+    } else {
+      log.debug("No input text to detect language from, returning empty list");
+    }
+    return languages;
+  }
+}

Copied: lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java (from r1183754, lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactory.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java?p2=lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java&p1=lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactory.java&r1=1183754&r2=1184754&rev=1184754&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactory.java (original)
+++ lucene/dev/trunk/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java Sun Oct 16 03:55:30 2011
@@ -34,7 +34,7 @@ import org.apache.solr.util.plugin.SolrC
  * and override the defaults. Here is the simplest processor config possible:
  * 
  * <pre class="prettyprint" >
- * &lt;processor class=&quot;org.apache.solr.update.processor.LanguageIdentifierUpdateProcessorFactory&quot;&gt;
+ * &lt;processor class=&quot;org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory&quot;&gt;
  *   &lt;str name=&quot;langid.fl&quot;&gt;title,text&lt;/str&gt;
  *   &lt;str name=&quot;langid.langField&quot;&gt;language_s&lt;/str&gt;
  * &lt;/processor&gt;
@@ -42,7 +42,7 @@ import org.apache.solr.util.plugin.SolrC
  * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
  * @since 3.5
  */
-public class LanguageIdentifierUpdateProcessorFactory extends
+public class TikaLanguageIdentifierUpdateProcessorFactory extends
         UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams {
 
   protected SolrParams defaults;
@@ -87,7 +87,7 @@ public class LanguageIdentifierUpdatePro
     if(req != null) {
       SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
     }
-    return new LanguageIdentifierUpdateProcessor(req, rsp, next);
+    return new TikaLanguageIdentifierUpdateProcessor(req, rsp, next);
   }
 
 

Added: lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/af
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/af?rev=1184754&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/af (added)
+++ lucene/dev/trunk/solr/contrib/langid/src/resources/org/apache/solr/update/processor/langdetect-profiles/af Sun Oct 16 03:55:30 2011
@@ -0,0 +1 @@
+{"freq":{"D":9246,"E":2445,"F":2510,"G":3299,"A":6930,"B":3706,"C":2451,"L":2519,"M":3951,"N":3334,"O":2514,"H":3034,"I":2837,"J":2196,"K":3663,"U":687,"T":2336,"W":2258,"V":2714,"Q":182,"P":3097,"S":8234,"R":3039,"Y":252,"X":214,"Z":422,"f":13583,"g":42805,"d":77385,"Feb":207,"e":240974,"b":21626,"c":4896,"a":128566,"n":127153,"o":86673,"l":57433,"m":31352,"j":4048,"k":45378,"h":17527,"i":140621,"w":24930,"v":32618,"u":35166,"t":82606,"s":102389,"r":98861,"q":199,"p":23331,"z":1187,"y":11757,"x":1123,"ï":264,"ë":2903,"ê":1053,"é":765,"á":212,"ü":233,"ö":184,"ó":216,"Eur":318,"Eng":637," l":3565," m":7731," n":16000," o":12065," h":7358," i":23795," j":1325," k":6363," d":33601," e":13358," f":1200," g":11018,"р":242,"с":306," a":8747,"т":161," b":8379," c":434," u":1931," t":8537," w":13128," v":24617," p":4859," s":15482," r":3617," J":2155," K":3559," H":2961," I":2185," N":3120," O":2318," L":2396," M":3803," B":3554," C":2109," A":6365,"
  F":2371," G":3138," D":8986," E":2271,"л":219,"к":266," Z":368," Y":241,"и":371,"о":333,"н":199," S":7708,"Ger":200," R":2881,"в":199," Q":162," P":2912,"а":481," W":2205," V":2322," U":571,"е":266," T":2130,"Fra":1006,"A ":345,"Da":804,"Co":478,"Ch":621,"Du":1025,"Do":201,"De":763,"Di":5828,"Fe":367,"Eu":354,"En":721,"El":212,"Ge":659,"Ga":319,"I ":452,"Fr":1217,"Fo":165,"Fi":216,"II ":246,"C ":278,"Au":486,"Ar":425,"At":187,"As":201,"D ":158,"Ba":648,"Af":2087,"Am":566,"An":491,"Ap":353,"Al":628,"Bu":243,"Br":778,"Ca":399,"Bi":180,"Be":880,"Bo":481,"Bl":161,"Kr":224,"Ko":657,"Le":490,"Li":504,"La":658,"Lu":245,"Lo":347,"Me":800,"Mi":548,"Ma":1360,"Mu":186,"Mo":627,"Ni":257,"Ne":763,"Na":666,"No":1092,"Ok":339,"Ol":206,"Her":157,"Gr":1326,"Go":356,"Ha":534,"He":680,"II":369,"Hi":301,"Ho":503,"Hu":294,"Hy":550,"In":919,"Is":158,"It":218,"Ja":713,"Je":157,"Jo":565,"Ju":623,"Ka":1489,"Ki":194,"Ke":447,"Un":253,"Tu":248,"Tr":236,"To":272,"Th":313,"Te":2
 62,"Ta":276,"V ":280,"Sw":402,"Sy":292,"St":964,"Su":1701,"Wo":181,"Wi":534,"Wa":412,"We":720,"Vo":315,"Vr":251,"Vi":374,"Va":314,"Ve":689,"Pr":551,"S ":157,"Pe":310,"Pa":727,"Po":681,"Pi":230,"Os":236,"Oo":423,"Or":191,"Se":814,"Sc":197,"Si":387,"Sl":222,"Sk":201,"Sp":443,"So":680,"Ru":645,"Ry":194,"Sa":728,"Re":621,"Ri":222,"Ro":746,"SA":233,"Ra":223,"Gre":501,"Gri":383,"Gra":158,"b ":1179,"Gro":254,"a ":7054,"i ":2513,"gd":570,"ge":16432,"ga":1621,"gb":319,"fk":224,"fl":183,"fg":323,"ff":351,"fi":1111,"fh":169,"fs":1224,"fr":2334,"fu":174,"ft":300,"fo":725,"Int":180,"he":6229,"ha":2610,"gn":360,"gl":334,"gi":2135,"gh":921,"gg":418,"gu":592,"gt":1512,"gs":1974,"gr":3459,"go":1385,"dt":211,"du":998,"dw":506,"g ":10256,"ea":936,"eb":3497,"ec":406,"ed":5721,"de":18394,"dd":606,"dg":161,"di":29432,"dh":249,"dj":173,"dm":299,"do":2521,"ds":2062,"dr":1453,"ew":3034,"eu":3603,"ev":2016,"ey":309,"fa":570,"h ":864,"Ind":251,"fd":469,"fe":948,"eh":993,"eg":3187,"ef":995,"ee":12296,"
 el":15653,"ek":7920,"ei":5726,"ep":2393,"eo":692,"en":27638,"em":4686,"et":10282,"es":15156,"er":33393,"ca":479,"e ":78745,"by":1025,"br":1953,"bu":1057,"bo":2123,"bl":1117,"bi":1966,"bb":156,"be":8513,"db":222,"In ":319,"da":3617,"f ":4067,"ct":207,"co":446,"ck":502,"ci":340,"ch":1526,"ce":547,"c ":311,"az":190,"ay":279,"ba":2057,"d ":15502,"at":11369,"as":9342,"ar":11432,"aw":597,"av":407,"au":883,"ak":2797,"al":9554,"ai":1291,"aj":155,"ap":2087,"am":3989,"an":36357,"ac":615,"ad":4564,"aa":18307,"ab":1064,"ag":2729,"ah":292,"ae":907,"af":1901,"nu":917,"nt":6760,"ns":9243,"nr":212,"no":2885,"nn":1621,"ny":191,"nw":666,"nv":455,"oe":6026,"of":3797,"oc":387,"od":1636,"oa":178,"ob":729,"om":5480,"on":10533,"ok":2525,"ol":5346,"oi":587,"og":2271,"oh":382,"ot":3827,"os":3306,"ov":1152,"ou":2993,"op":4558,"oo":12667,"or":14221,"r ":19504,"ow":1144,"pe":3683,"pg":229,"pa":2371,"pl":1195,"lê":351,"po":1932,"ph":223,"pi":1008,"lo":3369,"lm":315,"ll":2990,"ls":2634,"lp":392,"lw":3
 11,"lv":239,"lu":1548,"lt":993,"ly":716,"o ":2083,"md":261,"ma":3853,"mb":2182,"mg":224,"me":9151,"mi":2940,"mm":802,"mp":1223,"mo":1485,"ië":1437,"mt":249,"ms":966,"mu":1085,"p ":4720,"na":6444,"nb":510,"nc":507,"nd":12581,"ne":5737,"nf":203,"ng":9804,"nh":460,"ni":6127,"nj":300,"nk":2057,"nl":616,"nm":203,"jo":532,"ki":2683,"kh":210,"kg":239,"ke":8584,"ka":6722,"m ":5913,"kw":457,"ky":282,"ks":2318,"kt":2084,"ku":1443,"ko":3908,"kr":2375,"kk":1579,"kl":2200,"km":469,"li":9515,"lh":279,"lk":1158,"lj":705,"le":10290,"ld":1944,"lg":1526,"lf":717,"la":8341,"lb":446,"n ":58065,"hr":313,"ht":702,"hu":1684,"hi":1067,"ho":3048,"dé":160,"id":5034,"ic":1058,"ib":451,"ia":2568,"ig":5540,"if":581,"ie":47836,"hy":348,"k ":9212,"ir":2359,"is":17403,"it":9361,"iu":405,"iv":1008,"iw":219,"ik":8953,"il":3774,"im":1386,"in":25004,"io":1984,"eë":1032,"ip":899,"je":609,"ji":572,"iz":156,"l ":8172,"ja":1960,"wy":994,"z ":242,"wi":1800,"wo":4179,"vy":166,"y ":4684,"wa":9856,"we":6959,"
 vl":1196,"vi":4040,"vu":178,"vr":662,"vo":4078,"uw":282,"uu":992,"ve":5906,"va":16173,"x ":845,"ui":7822,"uk":678,"ul":2052,"ue":905,"ug":1045,"ur":5410,"us":5098,"ut":907,"um":1711,"un":2596,"up":170,"ty":1434,"tu":2643,"tt":1277,"tw":1177,"tv":217,"ub":1182,"ua":728,"ud":950,"uc":160,"w ":232,"to":5433,"tm":201,"tl":667,"ts":3814,"tr":4026,"tg":532,"te":20430,"tk":279,"tj":177,"ti":5658,"th":1701,"tb":213,"ta":9118,"su":1177,"sv":424,"ss":2799,"st":17122,"sy":1309,"sw":531,"sl":1811,"sk":5006,"sn":242,"sm":693,"sp":2566,"oë":412,"so":3731,"sr":312,"sd":385,"sc":448,"sf":208,"se":15556,"sh":473,"sg":396,"sj":338,"si":8436,"u ":1834,"sa":2367,"sb":577,"rr":652,"rs":6262,"rt":4139,"ru":2543,"rv":1198,"rw":1199,"ry":2450,"rp":1265,"ro":8165,"rn":1586,"rm":2087,"rl":1734,"rk":2996,"ri":11752,"rh":614,"rg":2653,"rf":378,"re":10923,"rd":7372,"rc":234,"rb":955,"ra":7710,"t ":22731,"qu":168,"s ":35284,"px":614,"Hy ":529,"py":231,"pt":765,"pu":844,"pp":1058,"pr":3258,"ps":659,"w�
 �ª":320,"zi":170,"ze":169,"za":209,"yg":162,"ye":406,"yf":643,"yd":927,"yw":439,"ys":1141,"yn":1041,"yl":288,"yk":1145,"Apr":247,"Aug":272,"Afr":2048,"Ame":464,"Ber":218,"Bel":171,"Bre":163,"Bra":191,"Bri":282,"Des":273,"Daa":460,"Chr":224,"Cha":171,"ër":307,"ël":325,"êr":697,"ë ":1979,"ê ":310,"é ":228,"Dit":1028,"Die":4537,"Dui":918,"Ned":417,"Nas":187,"Nov":238,"Noo":595,"Okt":256,"Oli":158,"Oos":361,"Par":313,"Pro":177,"Pre":186,"SA ":161,"Ita":207,"Jan":348,"Joh":290,"Jul":297,"Jun":245,"Kaa":543,"Kan":220,"Kat":191,"Kar":171,"Ker":270,"Kon":276,"Lat":181,"Lit":162,"Mei":281,"Mar":370,"Maa":286,"Mon":210,"Mid":157,"Wil":165,"Wes":439,"Vry":192,"Vol":161,"êre":674,"Swe":193,"Sy ":252,"Sui":1515,"Sta":443,"Ste":208,"Sep":228,"Spa":253,"Rus":560,"Sch":162,"Rep":214,"Rom":176,"Ver":555,"Uni":236,"The":196,"Tur":159,"bin":400,"blo":205,"bli":525,"bla":215,"boe":246,"boo":276,"bor":587,"bou":330,"ban":283,"bal":289,"bai":191,"baa":372,"bas":270,"bar":272
 ,"beh":366,"beg":372,"bee":325,"bed":285,"ber":1916,"bel":540,"bek":1148,"bew":349,"bev":630,"bes":1308,"bet":510,"bie":1052,"ce ":276,"bri":159,"bro":237,"bra":211,"bre":258,"bru":1062,"bur":584,"by ":693,"am ":1182,"ake":292,"al ":2759,"ain":204,"ak ":856,"aie":241,"agt":446,"anu":467,"ann":632,"ant":1705,"ans":3841,"ane":404,"ang":1856,"ani":742,"anj":191,"ank":961,"ap ":635,"ana":788,"anc":195,"and":5528,"amm":186,"amp":480,"ami":512,"ame":657,"amb":236,"ama":204,"alt":231,"als":160,"all":667,"alk":171,"alg":320,"ali":1276,"ald":217,"ale":2352,"alf":209,"ala":367,"an ":18298,"aks":261,"akt":740,"akl":166,"abe":229,"abi":201,"aby":216,"ae ":624,"aag":175,"aad":172,"aak":679,"aai":350,"aan":6190,"aal":1515,"aam":1083,"aas":579,"aar":5293,"aap":567,"aat":1563,"ad ":2565,"afg":266,"ai ":311,"age":184,"afd":268,"adm":206,"adi":436,"ade":539,"ag ":1304,"ads":176,"ach":166,"ada":249,"af ":494,"at ":6755,"arg":256,"are":965,"ard":1124,"ara":390,"aro":332,"arn":185,"arm":157,"arl
 ":301,"ark":397,"ari":1177,"arv":249,"ars":463,"art":1494,"ary":171,"asi":1669,"ase":210,"aso":169,"ar ":3216,"apa":189,"app":418,"aps":269,"as ":5230,"awe":308,"awi":169,"ata":346,"ast":673,"ass":518,"ato":426,"ate":1382,"ati":871,"ats":404,"atu":409,"aty":167,"aus":156,"jaa":1087,"jar":470,"je ":175,"joe":306,"jin":161,"jie":306,"ito":170,"itt":191,"its":1623,"isk":182,"ism":266,"iss":374,"ist":1582,"ita":608,"ite":1331,"itg":386,"iti":469,"ius":176,"ium":203,"ivi":590,"ive":294,"is ":12546,"ion":1252,"eër":158,"ipa":265,"ir ":1648,"isi":1018,"ise":601,"isa":220,"ire":181,"it ":3772,"kil":644,"kie":536,"kin":914,"km ":266,"kgr":173,"kee":210,"kei":339,"kel":962,"ken":2090,"kep":166,"ker":1342,"ke ":3014,"kra":345,"kse":472,"kry":1085,"kri":662,"kou":249,"kor":369,"kop":214,"koo":391,"kon":866,"kom":903,"kol":246,"koe":157,"ks ":710,"kke":1272,"kki":178,"klu":430,"kle":511,"kla":387,"kli":749,"kat":157,"kar":183,"kas":204,"kap":818,"kan":1256,"kal":611,"kaa":1596,"ka ":1
 388," Ga":319," Ge":658," Fo":161," Fr":1217," Fi":213," Ha":534," He":680," Go":354," Gr":1318," Hy":549," Hu":294," Ho":502," II":202," Hi":301," Ja":710," Is":157," It":218," In":916,"han":779," Ka":1486,"hal":311," Ke":447,"haw":164," Ki":192,"har":356," Jo":563," Ju":622,"haa":238,"had":164," La":657," Le":488," Li":502," Ko":657," Kr":224," Ma":1348," Mi":547," Me":799,"he ":399," Lo":346," Lu":244," Ne":762," Na":662," Ni":257," Mo":624," Mu":186,"hel":273,"hei":994,"hee":465,"hed":169,"het":2911,"her":350,"hem":255," Ap":349," Am":563," An":491," Al":626," Af":2082," Ba":645," Au":486," At":187," As":200," Ar":422," Be":877,"hie":290," Bi":179," Bl":161," Bo":479," Br":777," Bu":243,"his":173," Ca":384," Ch":612," Co":473," Da":803," Di":5802," De":761," Do":196," Du":1024," El":212," En":720," Eu":354," Fe":367," Wo":179," Wi":530," We":720," Wa":412,"god":193,"gs ":887,"gor":522,"gro":2150,"gra":537,"gri":320,"gre":401," Os":236," Or":191," Oo":422," Po":674," Pi":
 229," Pe":309," Pa":725,"gst":406," No":1092," Ol":205," Ok":339,"gte":962,"gti":391," Ra":221," Ro":743," Re":620," Ri":222," Pr":547,"gus":284," Sy":292," Sw":400," Su":1700," St":953," Ta":273," Th":307," Te":261," Tr":236," To":270," Ry":194," Ru":645," Sa":724," Si":385," Sc":196," Se":811," So":678," Sp":441," Sk":201," Sl":222," Va":313," Ve":669," Vi":371," Vo":314," Vr":251," Tu":243," Un":253," ja":1102,"ial":357,"ian":256," in":12303,"iaa":736," is":11238," ka":1533," ki":531," ke":481,"id ":2425," ha":612," he":3438," gr":2075," go":365,"ia ":794," hy":292," hi":477," ho":1750," hu":727,"iet":320,"ieu":180,"iew":413," ni":722,"iel":277," ne":437,"ien":998," na":2339,"ier":2228,"ies":4471,"ied":1248,"ief":177,"iek":2103," mu":691,"ig ":1346," mo":667," om":1497," on":2106," of":1952,"ifi":218," no":1205," le":910," li":598," n ":10980," la":1290," ku":387,"ich":258,"ie ":34696," km":407," kl":879,"ica":209," kr":319," ko":1672," me":4100," mi":830,"ids":257," ma":
 1329," lu":186,"idi":291,"ide":993,"idd":457,"ida":156," lo":197," af":820," aa":2320," ad":269," am":322," an":759," ak":286," al":829," ar":263," at":229," as":2284," ba":599,"il ":459," bi":320," be":5430," bo":565," bl":263," by":612," bu":213," br":340,"ika":2950,"igd":381,"ige":1604,"igh":698,"igi":270,"igg":185,"igt":498,"igs":156,"ik ":2305," en":9738,"imp":231," ei":517," el":502,"ime":187," ek":223," ee":1730,"ind":1030,"ina":506," fa":191,"inn":302," fo":227,"int":638,"ins":1349,"ine":545,"ing":6095," fi":368,"ini":615,"ink":417," ge":8191," ga":169,"inw":455,"ikk":629," ch":185,"ike":1814,"ila":498," da":1923,"in ":12178,"iku":209,"iks":287," do":1111,"ilo":514,"ill":662," dr":523," de":3947,"ilj":228,"ili":684,"ild":294," di":25510,"imb":245,"eë ":693,"io ":196," du":309," wê":298,"hom":166,"hou":360,"hoo":1325,"hoe":410," wy":201,"hul":552,"hui":260,"hri":224,"ht ":578," ru":233," sa":888," se":2315," si":590," sl":329," sk":1250," sp":887," so":2211," ra
 ":237," re":1576," ri":825," ro":614," pr":1589," s ":207," px":614,"hy ":302," ou":447,"hum":674," oo":2639," op":2809," or":325," pe":402," pa":556," pl":641," po":737," lê":242," wa":7840," we":1395," wo":2888," wi":454," va":14670," ve":4043," vo":2359," vr":575," vi":2068," vl":594," ty":439," tw":582," tu":692," ui":1746," ta":895," sw":227," sy":1183," st":4293," su":859," tr":387," to":1857," th":729," ti":190," te":2715,"ffe":165,"fer":157,"fel":155,"fha":158,"fge":290,"fam":176,"fde":429,"eta":359,"ete":1299,"eti":372,"esp":358,"eso":210,"est":2951,"ess":405,"eun":234,"eto":320,"etr":438,"ets":217,"ett":493,"eve":456,"eva":262,"evo":907,"evi":274,"eur":2292,"eus":242,"ewi":337,"ewe":1704,"ewo":449,"ey ":181,"ewa":222,"epe":254,"er ":10617,"epa":228,"eor":221,"es ":4626,"ept":277,"epu":400,"epr":184,"erk":2067,"erl":875,"eri":1765,"erg":1022,"erh":416,"ere":1861,"erf":286,"erd":1514,"era":1470,"erb":529,"et ":6083,"esk":1018,"esl":228,"esi":976,"ese":3607,"eu ":3
 38,"erv":860,"erw":949,"err":349,"ert":1101,"ers":4583,"ern":1142,"erm":861,"erp":342,"ero":382,"ekg":155,"ekk":206,"eko":474,"eks":950,"ekt":701,"en ":13492,"ela":904,"eld":1199,"elf":322,"ele":2593,"eli":1906,"elj":427,"elg":226,"elk":209,"ell":778,"elo":234,"els":1983,"elt":333,"ely":255,"emb":839,"ema":484,"eme":1266,"emo":181,"emi":456,"ep ":699,"ene":1142,"enh":254,"eng":314,"enb":269,"ena":610,"end":3112,"eno":500,"enn":400,"enk":275,"eni":1151,"ens":2864,"ent":2318,"ego":497,"ege":690,"egi":516,"eha":370,"egr":238,"egs":217,"egt":193,"eho":266,"ehe":259,"ek ":1799,"eis":330,"eil":544,"ein":1010,"eie":633,"eid":1307,"el ":3516,"eit":680,"eke":2739,"eka":220,"em ":967,"gin":784,"gie":714,"ght":548,"gep":249,"gen":1564,"get":297,"ger":1248,"ges":2014,"gev":788,"gew":944,"gee":448,"ged":475,"geb":2499,"geh":356,"geg":181,"gem":756,"gel":1995,"gek":350,"gde":427,"ge ":1916,"gaa":266,"gan":539,"ga ":157,"fst":852,"fri":2089,"for":371,"fie":369,"fil":208,"fin":174,"fis":177
 ,"da ":327,"de ":6409,"daa":645,"dag":700,"dae":480,"dat":659,"dan":233,"dam":165,"dde":490,"ch ":316,"cha":160,"ck ":233,"che":490,"ed ":1090,"eba":159,"ebe":354,"ebi":752,"ebo":768,"ebr":1168,"ei ":821,"ega":168,"eek":631,"een":2520,"eel":2072,"eem":410,"eed":587,"ees":884,"eer":3295,"eeu":449,"eet":195,"edi":638,"ede":2561,"eda":161,"eg ":316,"eds":321,"edr":340,"ee ":892,"ef ":280,"dwe":310,"dus":171,"dor":875,"doo":416,"don":160,"dom":227,"ds ":353,"dmi":211,"doe":283,"dst":428,"dui":309,"dri":421,"dra":423,"dry":204,"dsk":181,"dse":527,"dia":294,"der":4829,"des":476,"deu":1676,"dee":1279,"del":1695,"dek":186,"den":1206,"do ":172,"din":875,"dio":177,"dis":425,"dit":656,"die":24964,"dig":1168,"dik":198,"rhe":301,"rga":496,"rgi":335,"rge":595,"ret":312,"res":944,"rg ":777,"rea":245,"ree":1091,"ref":257,"red":294,"rei":545,"reg":1039,"ren":1300,"rek":765,"rel":674,"rep":191,"rf ":180,"rdo":215,"rdi":841,"rde":1873,"re ":2607,"rd ":3667,"ras":532,"rat":587,"rbi":190,"rba":1
 60,"rbe":287,"rag":291,"ran":2011,"ram":317,"ral":832,"rak":247,"raa":1046,"raf":284,"rad":331,"rs ":1922,"ros":273,"rot":330,"rom":305,"ron":1072,"roo":1778,"rop":575,"rou":212,"rov":708,"rod":199,"rol":315,"roe":1277,"rog":195,"rno":196,"rp ":728,"rna":508,"rne":469,"rmo":164,"rma":539,"rme":324,"rmi":175,"rlo":320,"rli":409,"rle":270,"rla":508,"rks":184,"rko":248,"rki":199,"rkl":203,"rke":440,"rka":271,"rm ":692,"rio":174,"rit":493,"ris":571,"riv":501,"rig":863,"ril":278,"rik":3384,"rin":1384,"ria":924,"ric":236,"rie":2029,"rk ":1040,"rwe":410,"rwy":498,"ryf":393,"rui":1143,"rug":256,"rum":244,"ruk":231,"rus":225,"rva":502,"rvl":353,"rvo":192,"rwa":171,"ry ":383,"rsk":872,"rsi":432,"rso":249,"rsp":591,"rsa":225,"rse":478,"rta":186,"rst":1083,"rtk":160,"rto":274,"rte":620,"rti":334,"rua":209,"rty":351,"rt ":1413,"rre":272,"saa":540,"sal":170,"sam":303,"san":408,"sas":204,"sa ":155,"ryw":338,"rys":282,"ryk":576,"sge":305,"sie":4039,"sid":185,"sia":299,"sit":436,"sis":296,"s
 ip":279,"sin":541,"sio":799,"sil":194,"sim":173,"sik":231,"sif":160,"sig":289,"sbu":231,"se ":9840,"sch":268,"ser":501,"ses":400,"set":250,"seu":239,"sea":162,"see":618,"sed":264,"sen":1323,"sem":298,"sel":1093,"sek":186,"spo":405,"spr":756,"spe":934,"spa":260,"sow":508,"som":247,"son":545,"soo":954,"soe":195,"sok":377,"st ":267,"sli":202,"slu":297,"sky":183,"sla":1006,"sle":205,"ski":804,"sko":594,"skr":1152,"sku":244,"ska":1212,"ske":665,"sië":283,"sma":173,"sme":382,"sse":1275,"ssa":198,"ssi":922,"ste":6829,"sta":5065,"sto":805,"sti":1396,"stu":693,"str":1673,"sty":226,"sui":596,"sve":167,"sy ":1199,"swa":313,"tal":1301,"taa":2499,"tad":2323,"tau":165,"tat":456,"tas":164,"tan":1021,"te ":8469,"ta ":339,"pa ":202,"pe ":459,"par":608,"pas":176,"paa":333,"pal":324,"pan":428,"pge":207,"pen":295,"per":1379,"pes":438,"pee":201,"pel":568,"pla":660,"lê ":268,"pli":169,"ple":241,"pie":480,"por":394,"poo":160,"pos":197,"pol":518,"ppy":184,"ppe":636,"pst":229,"pub":435,"pte":5
 75,"pra":251,"pri":484,"pre":726,"pro":1677,"pun":246,"px ":614,"py ":166,"ra ":424,"ngo":161,"ngr":289,"ngs":1292,"nge":2327,"nhe":276,"nel":314,"nen":189,"nem":225,"ner":1014,"net":468,"nes":533,"ng ":4906,"nee":762,"nce":206,"ne ":1530,"ndr":216,"nds":657,"ndo":326,"ndi":878,"nde":5081,"nda":453,"nal":790,"nam":291,"nad":316,"naf":372,"nab":229,"naa":1198,"nd ":4245,"nat":282,"nas":677,"na ":1572,"nwo":542,"nus":209,"nua":266,"ntw":393,"nto":201,"nts":300,"ntr":543,"nti":571,"ntl":164,"nta":457,"nte":1815,"nst":787,"nse":3345,"nsi":1079,"nsl":207,"nsk":498,"nt ":1757,"ns ":2476,"nog":456,"noe":477,"noo":659,"nom":368,"nne":904,"nni":442,"nië":246,"nli":373,"nke":345,"nkl":391,"nks":179,"nkr":453,"nje":156,"nig":640,"nie":1831,"nk ":274,"niv":210,"nis":1512,"nin":804,"ogr":272,"ogi":423,"oi ":216,"oha":228,"oeë":178,"ok ":1432,"ol ":554,"ock":164,"oe ":303,"ode":551,"odi":176,"ods":177,"of ":2323,"oek":499,"oel":276,"oem":563,"oeg":231,"oei":336,"oer":752,"oes":295,"
 oet":302,"oen":602,"oep":714,"odu":188,"oed":477,"og ":895,"ofs":803,"oew":261,"od ":254,"obe":382,"oud":510,"oue":197,"ote":350,"ott":175,"ots":913,"oto":266,"ost":637,"osi":266,"ose":346,"oss":176,"oso":190,"owa":484,"owe":208,"ovi":678,"ove":370,"ous":302,"our":167,"out":306,"opo":205,"opp":449,"ope":438,"opg":213,"opa":195,"os ":1171,"oon":731,"ool":561,"oom":393,"ook":1376,"ooi":288,"oof":1146,"oog":389,"ood":288,"or ":1152,"oot":1351,"oos":958,"oor":4776,"oop":341,"ork":260,"orl":386,"orm":964,"orp":858,"ord":4583,"ore":773,"org":587,"ori":1212,"ou ":999,"ort":1219,"ors":871,"orw":195,"ot ":1528,"orb":186,"ora":235,"ola":171,"on ":1522,"oli":772,"oll":288,"olk":702,"ole":263,"olg":904,"ols":270,"olo":636,"om ":1870,"okk":553,"ona":980,"ond":1915,"one":1178,"ong":620,"oni":1012,"onl":220,"onk":232,"onn":184,"ono":391,"ons":511,"ont":1339,"oma":425,"ome":845,"omi":324,"omm":454,"omp":297,"oms":595,"op ":2264,"la ":334,"le ":3834,"lf ":175,"lde":601,"laa":982,"lad":180,"l
 ag":434,"lak":490,"lan":4154,"lar":155,"lat":361,"las":433,"ld ":695,"kus":410,"kun":548,"kul":242,"kwe":204,"kwa":191,"kte":822,"kst":257,"ksi":463,"ktr":342,"ktu":210,"kti":247,"kto":369,"ls ":1008,"lon":293,"lom":430,"loo":382,"loe":423,"log":655,"los":274,"lië":349,"lti":157,"lub":411,"lug":221,"lst":643,"lte":252,"lse":623,"lge":754,"lew":250,"leu":193,"les":329,"let":347,"ler":415,"lem":358,"len":1056,"lek":605,"lei":1010,"leg":257,"lee":477,"led":218,"lg ":483,"lo ":169,"lla":325,"lle":1578,"lli":615,"lke":200,"lki":447,"ljo":223,"ll ":176,"lja":430,"lit":831,"lis":504,"leë":449,"lin":1208,"lim":201,"lid":165,"lia":364,"lik":2917,"lig":818,"lie":1618,"ma ":226,"mb ":655,"maa":1244,"mag":221,"mar":331,"mas":207,"mal":270,"man":726,"mat":394,"mba":172,"mbi":179,"mbe":814,"mbo":161,"me ":936,"mde":163,"med":223,"mee":1533,"met":2981,"mes":247,"mer":991,"mel":330,"men":1550,"lui":390,"lus":194,"lwe":213,"lyk":221,"lyn":187,"mpi":220,"mpe":208,"mpo":176,"mpt":267,"ms
  ":488,"moe":196,"mod":233,"mon":329,"mst":248,"mus":488,"mun":417,"ër ":180,"mge":191,"min":806,"mil":465,"mit":231,"mig":184,"mie":523,"mid":310,"ië ":1136,"mme":353,"wêr":319,"yst":183,"ys ":680,"ywe":370,"ye ":306,"yf ":380,"yde":281,"yds":165,"yd ":230,"yn ":461,"yns":175,"yk ":810,"wys":531,"wor":2620,"woo":760,"won":526,"we ":1260,"wes":799,"wer":1583,"wet":305,"wen":427,"wel":545,"weg":270,"wee":1257,"wis":166,"wit":342,"wie":194,"win":417,"wil":177,"wik":231,"wan":300,"wat":5174,"war":532,"was":2236,"waa":1031,"vry":194,"vro":313,"vir":1570,"vin":921,"vie":880,"vis":289,"vla":709,"vlo":280,"voe":444,"vol":1592,"voo":1083,"vor":625,"ver":4566,"ven":170,"vem":236,"vel":250,"vee":302,"val":319,"van":14723,"vat":155,"vaa":414,"uwe":229,"uur":863,"usl":180,"usi":606,"use":380,"ust":585,"uss":1129,"ute":176,"uto":171,"us ":1998,"ure":395,"urg":669,"uri":191,"urk":167,"uro":352,"urs":211,"urt":189,"ur ":2547,"umb":689,"ume":172,"unt":325,"uns":289,"uni":820,"und":
 530,"um ":614,"ult":270,"ull":459,"uli":358,"un ":219,"uid":2285,"uik":850,"uim":162,"uis":508,"uk ":200,"uit":3378,"ul ":272,"ugb":161,"ugu":278,"ude":184,"udi":240,"ue ":322,"ug ":159,"ub ":406,"uar":522,"ubl":464,"ud ":181,"tyn":228,"ty ":384,"tur":232,"tus":988,"tuu":617,"tui":232,"tud":171,"tyd":628,"twi":269,"twe":751,"ts ":533,"tre":1022,"tra":1128,"tri":607,"tru":366,"tro":780,"tse":746,"tsk":298,"tsl":425,"tst":993,"tte":641,"tti":226,"to ":272,"tof":244,"toe":713,"tob":268,"tot":1108,"tom":182,"ton":586,"tol":317,"tor":808,"too":280,"til":187,"tik":334,"tie":1846,"tig":1053,"tis":241,"tin":826,"tio":267,"thu":695,"tkl":165,"tli":191,"tla":301,"tem":732,"ten":1059,"tei":844,"tek":528,"tel":2135,"tee":779,"teg":166,"ted":237,"th ":270,"teu":212,"tes":357,"ter":4231,"tge":442,"the":380},"n_words":[1541130,1808182,1328687],"name":"af"}
\ No newline at end of file