You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/27 19:50:55 UTC

svn commit: r1536184 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko: dic/DictionaryUtil.java dic/HangulDictionary.java morph/EomiUtil.java morph/WordSpaceAnalyzer.java

Author: rmuir
Date: Sun Oct 27 18:50:54 2013
New Revision: 1536184

URL: http://svn.apache.org/r1536184
Log:
LUCENE-4956: replace some getWord != null with hasWord

Modified:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sun Oct 27 18:50:54 2013
@@ -83,6 +83,11 @@ public class DictionaryUtil {
     }
   }
   
+  /** true if this word exists */
+  public static boolean hasWord(CharSequence key) {
+    return dictionary.lookup(key) != null;
+  }
+  
   /** true if something with this prefix exists */
   public static boolean hasWordPrefix(CharSequence prefix) {
     return dictionary.hasPrefix(prefix);
@@ -98,78 +103,43 @@ public class DictionaryUtil {
     }
   }
   
-  /** Looks up noun, compound noun, or adverb */
-  public static WordEntry getWordExceptVerb(String key) {
+  /** returns word (or null) matching specified features */
+  private static WordEntry getWord(String key, int on, int off) {
     Byte clazz = dictionary.lookup(key);
     if (clazz == null) {
       return null;
     }
     char flags = dictionary.getFlags(clazz);
-    if ((flags & (WordEntry.NOUN | WordEntry.BUSA)) != 0) {
+    if ((flags & on) != 0 && (flags & off) == 0) {
       return new WordEntry(key, flags, clazz);
     } else {
       return null;
     }
   }
   
+  /** Looks up noun, compound noun, or adverb */
+  public static WordEntry getWordExceptVerb(String key) {
+    return getWord(key, WordEntry.NOUN | WordEntry.BUSA, 0);
+  }
+  
   /** Looks up a noun (but not compound noun) */
   public static WordEntry getNoun(String key) {
-    Byte clazz = dictionary.lookup(key);
-    if (clazz == null) {
-      return null;
-    }
-    char flags = dictionary.getFlags(clazz);
-    if ((flags & WordEntry.NOUN) != 0 && (flags & WordEntry.COMPOUND) == 0) {
-      return new WordEntry(key, flags, clazz);
-    } else {
-      return null;
-    }
+    return getWord(key, WordEntry.NOUN, WordEntry.COMPOUND);
   }
   
-  /**
-   * return all noun including compound noun
-   */
+  /** return all noun including compound noun */
   public static WordEntry getAllNoun(String key) {  
-    Byte clazz = dictionary.lookup(key);
-    if (clazz == null) {
-      return null;
-    }
-    char flags = dictionary.getFlags(clazz);
-    if ((flags & WordEntry.NOUN) != 0) {
-      return new WordEntry(key, flags, clazz);
-    } else {
-      return null;
-    }
+    return getWord(key, WordEntry.NOUN, 0);
   }
   
-  /**
-   * returns any verb
-   */
+  /** returns any verb */
   public static WordEntry getVerb(String key) {
-    Byte clazz = dictionary.lookup(key);
-    if (clazz == null) {
-      return null;
-    }
-    char flags = dictionary.getFlags(clazz);
-    if ((flags & WordEntry.VERB) != 0) {
-      return new WordEntry(key, flags, clazz);
-    } else {
-      return null;
-    }
+    return getWord(key, WordEntry.VERB, 0);
   }
   
   /** Looks up an adverb-only */
   public static WordEntry getBusa(String key) {
-    Byte clazz = dictionary.lookup(key);
-    if (clazz == null) {
-      return null;
-    }
-    char flags = dictionary.getFlags(clazz);
-    if ((flags & WordEntry.BUSA) != 0 && (flags & WordEntry.NOUN) == 0) {
-      return new WordEntry(key, flags, clazz);
-    } else {
-      return null;
-    }
+    return getWord(key, WordEntry.BUSA, WordEntry.NOUN);
   }
   
   /** return list of irregular compounds for word class. */

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java Sun Oct 27 18:50:54 2013
@@ -73,7 +73,7 @@ class HangulDictionary {
   }
   
   /** looks up word class for a word (exact match) */
-  Byte lookup(String key) {
+  Byte lookup(CharSequence key) {
     // TODO: why is does this thing lookup empty strings?
     if (key.length() == 0) {
       return null;

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java Sun Oct 27 18:50:54 2013
@@ -125,7 +125,9 @@ class EomiUtil {
 
     if(nChrs.length==2&&chrs[index]=='시'&&(chrs.length<=index+1||
         (chrs.length>index+1&&chrs[index+1]!='셨'))) {
-      if(DictionaryUtil.getWord(results[0])!=null) return results;  //'시'가 포함된 단어가 있다. 성가시다/도시다/들쑤시다 
+      if (DictionaryUtil.hasWord(results[0])) {
+        return results;  //'시'가 포함된 단어가 있다. 성가시다/도시다/들쑤시다 
+      }
       pomi = chrs[index]+pomi;  
       setPomiResult(results,stem.substring(0,index),pomi);      
       if(--index==0||chrs[index]!='으') return results; // 다음이거나 선어말어미가 없다면...        

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java Sun Oct 27 18:50:54 2013
@@ -115,7 +115,7 @@ public class WordSpaceAnalyzer {
     if(output.getLastEnd()<input.length()) {
       
       String source = input.substring(output.getLastEnd());
-      int score = DictionaryUtil.getWord(source)==null ? AnalysisOutput.SCORE_ANALYSIS : AnalysisOutput.SCORE_CORRECT;
+      int score = DictionaryUtil.hasWord(source) ? AnalysisOutput.SCORE_CORRECT : AnalysisOutput.SCORE_ANALYSIS;
       AnalysisOutput o =new AnalysisOutput(source,null,null,PatternConstants.POS_NOUN,
           PatternConstants.PTN_N,score);
       
@@ -325,7 +325,7 @@ public class WordSpaceAnalyzer {
           ) {
         eend--;
       }else if(pvword!=null&&candidates.get(0).getPatn()>=PatternConstants.PTN_VM&& // 명사 + 용언 어구 중에.. 용언어구로 단어를 이루는 경우는 없다.
-          candidates.get(0).getPatn()<=PatternConstants.PTN_VMXMJ && DictionaryUtil.getWord(input)!=null){
+          candidates.get(0).getPatn()<=PatternConstants.PTN_VMXMJ && DictionaryUtil.hasWord(input)){
         candidates.clear();
         break;
       }else if(pvword!=null&&VerbUtil.verbSuffix(candidates.get(0).getStem())
@@ -550,7 +550,7 @@ public class WordSpaceAnalyzer {
         
     for (int i = es; i < str.length(); i++) {
       if (SyllableFeatures.hasFeature(str.charAt(i), SyllableFeatures.JOSA1)) {       
-        return DictionaryUtil.getWord(str.substring(ws,i)) != null;
+        return DictionaryUtil.hasWord(str.substring(ws,i));
       }
     }