You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/01/04 17:15:34 UTC
svn commit: r1555360 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/
lucene/analysis/common/src/resources/META-INF/services/
lucene/analysis/common/src/resources/org/apach...
Author: rmuir
Date: Sat Jan 4 16:15:34 2014
New Revision: 1555360
URL: http://svn.apache.org/r1555360
Log:
LUCENE-5379: Kurdish Analyzer
Added:
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/
- copied from r1555359, lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/
lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ckb/
- copied from r1555359, lucene/dev/trunk/lucene/analysis/common/src/resources/org/apache/lucene/analysis/ckb/
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/
- copied from r1555359, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/
lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/lang/stopwords_ckb.txt
- copied unchanged from r1555359, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/stopwords_ckb.txt
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/build.xml (contents, props changed)
lucene/dev/branches/branch_4x/solr/example/ (props changed)
lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sat Jan 4 16:15:34 2014
@@ -26,6 +26,8 @@ New Features
matter in practice if the number of ranges is over 10 or so. (Mike
McCandless)
+* LUCENE-5379: Add Analyzer for Kurdish. (Robert Muir)
+
Build
* LUCENE-5217: Maven config: get dependencies from Ant+Ivy config; disable
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory Sat Jan 4 16:15:34 2014
@@ -20,6 +20,8 @@ org.apache.lucene.analysis.br.BrazilianS
org.apache.lucene.analysis.cjk.CJKBigramFilterFactory
org.apache.lucene.analysis.cjk.CJKWidthFilterFactory
org.apache.lucene.analysis.cn.ChineseFilterFactory
+org.apache.lucene.analysis.ckb.SoraniNormalizationFilterFactory
+org.apache.lucene.analysis.ckb.SoraniStemFilterFactory
org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory
org.apache.lucene.analysis.commongrams.CommonGramsQueryFilterFactory
org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilterFactory
Modified: lucene/dev/branches/branch_4x/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/build.xml?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/build.xml (original)
+++ lucene/dev/branches/branch_4x/solr/build.xml Sat Jan 4 16:15:34 2014
@@ -612,7 +612,7 @@
<property name="analysis-common.res.dir" value="../lucene/analysis/common/src/resources/org/apache/lucene/analysis"/>
<property name="analysis-kuromoji.res.dir" value="../lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis"/>
- <property name="analysis.conf.dest" value="${example}/solr/conf/lang"/>
+ <property name="analysis.conf.dest" value="${example}/solr/collection1/conf/lang"/>
<target name="sync-analyzers"
description="Committers' Helper: synchronizes analysis resources (e.g. stoplists) to the example">
@@ -625,6 +625,9 @@
<!-- catalan -->
<copy verbose="true" file="${analysis-common.res.dir}/ca/stopwords.txt"
tofile="${analysis.conf.dest}/stopwords_ca.txt"/>
+ <!-- kurdish -->
+ <copy verbose="true" file="${analysis-common.res.dir}/ckb/stopwords.txt"
+ tofile="${analysis.conf.dest}/stopwords_ckb.txt"/>
<!-- czech -->
<copy verbose="true" file="${analysis-common.res.dir}/cz/stopwords.txt"
tofile="${analysis.conf.dest}/stopwords_cz.txt"/>
Modified: lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml?rev=1555360&r1=1555359&r2=1555360&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/schema.xml Sat Jan 4 16:15:34 2014
@@ -779,6 +779,18 @@
</analyzer>
</fieldType>
+ <!-- Kurdish -->
+ <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.SoraniNormalizationFilterFactory"/>
+ <!-- for any latin text -->
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
+ <filter class="solr.SoraniStemFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
<!-- Czech -->
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer>