You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/20 12:20:15 UTC

svn commit: r1533872 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang/src: data/ java/org/apache/lucene/analysis/ko/ java/org/apache/lucene/analysis/ko/dic/ resources/org/apache/lucene/analysis/ko/dic/ test/org/apache/lucene/analysis/ko/

Author: rmuir
Date: Sun Oct 20 10:20:12 2013
New Revision: 1533872

URL: http://svn.apache.org/r1533872
Log:
LUCENE-4956: remove slow caseless match in trie, don't read headers as actual entries

Modified:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/wiki_results.txt

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
 ! See the License for the specific language governing permissions and
 ! limitations under the License.
 !
-S:R:DBXX
+!S:R:DBXX
 가건물:가,건물:0000
 가게문:가,게문:0000
 가게채:가,게채:0000

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
 ! See the License for the specific language governing permissions and
 ! limitations under the License.
 !
-WORD,NVZDBIPSCC
+!WORD,NVZDBIPSCC
 가,110000000X
 가가,100000000X
 가가호호,101000000X

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
 ! See the License for the specific language governing permissions and
 ! limitations under the License.
 !
-EXTENSION,NVZDBI
+!EXTENSION,NVZDBI
 미국,100000000X
 영국,100000000X
 프랑스,100000000X

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java Sun Oct 20 10:20:12 2013
@@ -95,8 +95,8 @@ public class KoreanAnalyzer extends Stop
   protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
     final KoreanTokenizer src = new KoreanTokenizer(reader);
     src.setMaxTokenLength(maxTokenLength);
-    TokenStream tok = new KoreanFilter(src, bigrammable, hasOrigin, exactMatch, originCNoun);
-    tok = new LowerCaseFilter(matchVersion, tok);
+    TokenStream tok = new LowerCaseFilter(matchVersion, src);
+    tok = new KoreanFilter(tok, bigrammable, hasOrigin, exactMatch, originCNoun);
     tok = new StopFilter(matchVersion, tok, stopwords);
     return new TokenStreamComponents(src, tok) {
       @Override

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sun Oct 20 10:20:12 2013
@@ -33,7 +33,7 @@ import org.apache.lucene.analysis.ko.uti
 public class DictionaryUtil {
   private DictionaryUtil() {}
   
-  private static final Trie<String,WordEntry> dictionary = new Trie<String, WordEntry>(true);
+  private static final Trie<String,WordEntry> dictionary = new Trie<String, WordEntry>(false);
   
   private static final Set<String> josas = new HashSet<String>();
   

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
 ! See the License for the specific language governing permissions and
 ! limitations under the License.
 !
-S:R:DBXX
+!S:R:DBXX
 가건물:가,건물:0000
 가게문:가,게문:0000
 가게채:가,게채:0000

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
 ! See the License for the specific language governing permissions and
 ! limitations under the License.
 !
-WORD,NVZDBIPSCC
+!WORD,NVZDBIPSCC
 가,110000000X
 가가,100000000X
 가가호호,101000000X

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
 ! See the License for the specific language governing permissions and
 ! limitations under the License.
 !
-EXTENSION,NVZDBI
+!EXTENSION,NVZDBI
 미국,100000000X
 영국,100000000X
 프랑스,100000000X

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/wiki_results.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/wiki_results.txt?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
Binary files - no diff available.