You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/01/04 17:15:34 UTC

svn commit: r1555360 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/ lucene/analysis/common/src/resources/META-INF/services/ lucene/analysis/common/src/resources/org/apach...

Author: rmuir
Date: Sat Jan  4 16:15:34 2014
New Revision: 1555360

URL: http://svn.apache.org/r1555360
Log:
LUCENE-5379: Kurdish Analyzer

Added:
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/
      - copied from r1555359, lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ckb/
      - copied from r1555359, lucene/dev/trunk/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ckb/
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/
      - copied from r1555359, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/
    lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/lang/stopwords_ckb.txt
      - copied unchanged from r1555359, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/stopwords_ckb.txt
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (contents, props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sat Jan  4 16:15:34 2014
@@ -26,6 +26,8 @@ New Features
   matter in practice if the number of ranges is over 10 or so.  (Mike
   McCandless)
 
+* LUCENE-5379: Add Analyzer for Kurdish.  (Robert Muir)
+
 Build
 
 * LUCENE-5217: Maven config: get dependencies from Ant+Ivy config; disable

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory Sat Jan  4 16:15:34 2014
@@ -20,6 +20,8 @@ org.apache.lucene.analysis.br.BrazilianS
 org.apache.lucene.analysis.cjk.CJKBigramFilterFactory
 org.apache.lucene.analysis.cjk.CJKWidthFilterFactory
 org.apache.lucene.analysis.cn.ChineseFilterFactory
+org.apache.lucene.analysis.ckb.SoraniNormalizationFilterFactory
+org.apache.lucene.analysis.ckb.SoraniStemFilterFactory
 org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory
 org.apache.lucene.analysis.commongrams.CommonGramsQueryFilterFactory
 org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilterFactory

Modified: lucene/dev/branches/branch_4x/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/build.xml?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/build.xml (original)
+++ lucene/dev/branches/branch_4x/solr/build.xml Sat Jan  4 16:15:34 2014
@@ -612,7 +612,7 @@
   
   <property name="analysis-common.res.dir"  value="../lucene/analysis/common/src/resources/org/apache/lucene/analysis"/>
   <property name="analysis-kuromoji.res.dir"  value="../lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis"/>
-  <property name="analysis.conf.dest" value="${example}/solr/conf/lang"/>
+  <property name="analysis.conf.dest" value="${example}/solr/collection1/conf/lang"/>
 
   <target name="sync-analyzers"
           description="Committers' Helper: synchronizes analysis resources (e.g. stoplists) to the example">
@@ -625,6 +625,9 @@
     <!-- catalan -->
     <copy verbose="true" file="${analysis-common.res.dir}/ca/stopwords.txt"
                          tofile="${analysis.conf.dest}/stopwords_ca.txt"/>
+    <!-- kurdish -->
+    <copy verbose="true" file="${analysis-common.res.dir}/ckb/stopwords.txt"
+                         tofile="${analysis.conf.dest}/stopwords_ckb.txt"/>
     <!-- czech -->
     <copy verbose="true" file="${analysis-common.res.dir}/cz/stopwords.txt"
                          tofile="${analysis.conf.dest}/stopwords_cz.txt"/>

Modified: lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml Sat Jan  4 16:15:34 2014
@@ -779,6 +779,18 @@
       </analyzer>
     </fieldType>
 
+    <!-- Kurdish -->
+    <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SoraniNormalizationFilterFactory"/>
+        <!-- for any latin text -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
+        <filter class="solr.SoraniStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
     <!-- Czech -->
     <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
       <analyzer>