You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/20 12:20:15 UTC
svn commit: r1533872 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src: data/
java/org/apache/lucene/analysis/ko/ java/org/apache/lucene/analysis/ko/dic/
resources/org/apache/lucene/analysis/ko/dic/
test/org/apache/lucene/analysis/ko/
Author: rmuir
Date: Sun Oct 20 10:20:12 2013
New Revision: 1533872
URL: http://svn.apache.org/r1533872
Log:
LUCENE-4956: remove slow caseless match in trie, don't read headers as actual entries
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/wiki_results.txt
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
! See the License for the specific language governing permissions and
! limitations under the License.
!
-S:R:DBXX
+!S:R:DBXX
ê°ê±´ë¬¼:ê°,건물:0000
ê°ê²ë¬¸:ê°,ê²ë¬¸:0000
ê°ê²ì±:ê°,ê²ì±:0000
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
! See the License for the specific language governing permissions and
! limitations under the License.
!
-WORD,NVZDBIPSCC
+!WORD,NVZDBIPSCC
ê°,110000000X
ê°ê°,100000000X
ê°ê°í¸í¸,101000000X
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
! See the License for the specific language governing permissions and
! limitations under the License.
!
-EXTENSION,NVZDBI
+!EXTENSION,NVZDBI
미êµ,100000000X
ìêµ,100000000X
íëì¤,100000000X
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java Sun Oct 20 10:20:12 2013
@@ -95,8 +95,8 @@ public class KoreanAnalyzer extends Stop
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final KoreanTokenizer src = new KoreanTokenizer(reader);
src.setMaxTokenLength(maxTokenLength);
- TokenStream tok = new KoreanFilter(src, bigrammable, hasOrigin, exactMatch, originCNoun);
- tok = new LowerCaseFilter(matchVersion, tok);
+ TokenStream tok = new LowerCaseFilter(matchVersion, src);
+ tok = new KoreanFilter(tok, bigrammable, hasOrigin, exactMatch, originCNoun);
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sun Oct 20 10:20:12 2013
@@ -33,7 +33,7 @@ import org.apache.lucene.analysis.ko.uti
public class DictionaryUtil {
private DictionaryUtil() {}
- private static final Trie<String,WordEntry> dictionary = new Trie<String, WordEntry>(true);
+ private static final Trie<String,WordEntry> dictionary = new Trie<String, WordEntry>(false);
private static final Set<String> josas = new HashSet<String>();
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
! See the License for the specific language governing permissions and
! limitations under the License.
!
-S:R:DBXX
+!S:R:DBXX
ê°ê±´ë¬¼:ê°,건물:0000
ê°ê²ë¬¸:ê°,ê²ë¬¸:0000
ê°ê²ì±:ê°,ê²ì±:0000
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
! See the License for the specific language governing permissions and
! limitations under the License.
!
-WORD,NVZDBIPSCC
+!WORD,NVZDBIPSCC
ê°,110000000X
ê°ê°,100000000X
ê°ê°í¸í¸,101000000X
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic Sun Oct 20 10:20:12 2013
@@ -13,7 +13,7 @@
! See the License for the specific language governing permissions and
! limitations under the License.
!
-EXTENSION,NVZDBI
+!EXTENSION,NVZDBI
미êµ,100000000X
ìêµ,100000000X
íëì¤,100000000X
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/wiki_results.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/test/org/apache/lucene/analysis/ko/wiki_results.txt?rev=1533872&r1=1533871&r2=1533872&view=diff
==============================================================================
Binary files - no diff available.