You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/27 19:15:44 UTC

svn commit: r1536174 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko: dic/DictionaryUtil.java morph/EomiUtil.java morph/NounUtil.java

Author: rmuir
Date: Sun Oct 27 18:15:44 2013
New Revision: 1536174

URL: http://svn.apache.org/r1536174
Log:
LUCENE-4956: move this out of dictionaryutil

Modified:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1536174&r1=1536173&r2=1536174&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sun Oct 27 18:15:44 2013
@@ -203,25 +203,6 @@ public class DictionaryUtil {
     return suffixs.contains(str);
   }
   
-  /**
-   * ㄴ,ㄹ,ㅁ,ㅂ과 eomi 가 결합하여 어미가 될 수 있는지 점검한다.
-   */
-  public static String combineAndEomiCheck(char s, String eomi) {
-  
-    if(eomi==null) eomi="";
-
-    if(s=='ㄴ') eomi = "은"+eomi;
-    else if(s=='ㄹ') eomi = "을"+eomi;
-    else if(s=='ㅁ') eomi = "음"+eomi;
-    else if(s=='ㅂ') eomi = "습"+eomi;
-    else eomi = s+eomi;
-
-    if(existEomi(eomi)) return eomi;    
-
-    return null;
-    
-  }
-  
   private static void readFileToSet(final Set<String> set, String dic) throws IOException {    
     DictionaryResources.readLines(dic, new LineProcessor() {
       @Override

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java?rev=1536174&r1=1536173&r2=1536174&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java Sun Oct 27 18:15:44 2013
@@ -184,7 +184,7 @@ class EomiUtil {
     if((chrs.length==3)
         &&(chrs[2]=='ㄴ'||chrs[2]=='ㄹ'||chrs[2]=='ㅁ'||chrs[2]=='ㅂ')
         &&EomiUtil.IsNLMBSyl(estem,chrs[2])
-        && DictionaryUtil.combineAndEomiCheck(chrs[2], end)!=null) 
+        && combineAndEomiCheck(chrs[2], end)!=null) 
     {    
       strs[1] = Character.toString(chrs[2]);
       if(end.length()>0) strs[1] += end;
@@ -192,7 +192,7 @@ class EomiUtil {
    	  strs[0] = stem.substring(0,strlen-1) + MorphUtil.makeChar(estem, 0);  
     } 
     else if(chrs.length==3 && chrs[2]=='ㄹ' && DictionaryUtil.getVerb(stem)!=null 
-    		&& DictionaryUtil.combineAndEomiCheck(chrs[2], end)!=null) 
+    		&& combineAndEomiCheck(chrs[2], end)!=null) 
     {
         strs[1] = Character.toString(chrs[2]);
         if(end.length()>0) strs[1] += end;
@@ -211,27 +211,27 @@ class EomiUtil {
     else if(chrs[0]!='ㅇ'&&
         (chrs[1]=='ㅏ'||chrs[1]=='ㅓ'||chrs[1]=='ㅔ'||chrs[1]=='ㅐ')&&
         (chrs.length==2 || SyllableFeatures.hasFeature(estem, SyllableFeatures.YNPAH)) &&
-        (DictionaryUtil.combineAndEomiCheck('어', end)!=null)) 
+        (combineAndEomiCheck('어', end)!=null)) 
     {        
       strs[0] = stem;
       if(chrs.length==2) strs[1] = "어"+end;  
       else strs[1] = end;    
     } 
     else if(estem=='하'&&end!=null&&end.startsWith("여")&&
-        DictionaryUtil.combineAndEomiCheck('어', end.substring(1))!=null) 
+        combineAndEomiCheck('어', end.substring(1))!=null) 
     {      
       strs[0] = stem;
       strs[1] = "어"+end.substring(1);  
     }
     else if(estem=='려'&&end!=null&& // 꺼려=>꺼리어, 꺼려서=>꺼리어서
-          DictionaryUtil.combineAndEomiCheck('어', end)!=null) 
+          combineAndEomiCheck('어', end)!=null) 
     {      
         strs[0] = stem.substring(0,stem.length()-1)+"리";
         strs[1] = "어"+end;        
     }
     else if((chrs.length==2)&&
         (chrs[1]=='ㅘ'||chrs[1]=='ㅙ'||chrs[1]=='ㅝ'||chrs[1]=='ㅕ'||chrs[1]=='ㅐ'||chrs[1]=='ㅒ')&&
-        (DictionaryUtil.combineAndEomiCheck('어', end)!=null)) 
+        (combineAndEomiCheck('어', end)!=null)) 
     {    
   
       StringBuffer sb = new StringBuffer();
@@ -265,4 +265,32 @@ class EomiUtil {
     
     return strs;
   }
+  
+  /**
+   * ㄴ,ㄹ,ㅁ,ㅂ과 eomi 가 결합하여 어미가 될 수 있는지 점검한다.
+   */
+  private static String combineAndEomiCheck(char s, String eomi) {
+  
+    if (eomi == null) {
+      eomi = "";
+    }
+    
+    switch(s) {
+      case 'ㄴ': eomi = "은" + eomi;
+               break;
+      case 'ㄹ': eomi = "을" + eomi;
+               break;
+      case 'ㅁ': eomi = "음" + eomi;
+               break;
+      case 'ㅂ': eomi = "습" + eomi;
+               break;
+      default: eomi = s + eomi;
+    }
+
+    if (DictionaryUtil.existEomi(eomi)) {
+      return eomi;    
+    } else {
+      return null;
+    }
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java?rev=1536174&r1=1536173&r2=1536174&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java Sun Oct 27 18:15:44 2013
@@ -27,19 +27,6 @@ import org.apache.lucene.analysis.ko.dic
 
 class NounUtil {
   private NounUtil() {}
-
-  private static boolean isDNoun(char ch) {
-    switch(ch) {
-      case '등':
-      case '들':
-      case '상':
-      case '간':
-      case '뿐':
-      case '별':
-      case '적': return true;
-      default: return false;
-    }
-  }
     
   /**
    * 
@@ -217,21 +204,42 @@ class NounUtil {
       
     return true;
   }
+  
+  private static boolean isDNoun(char ch) {
+    switch(ch) {
+      case '등':
+      case '들':
+      case '상':
+      case '간':
+      case '뿐':
+      case '별':
+      case '적': return true;
+      default: return false;
+    }
+  }
     
   /*
      * 마지막 음절이 명사형 접미사(등,상..)인지 조사한다.
      */
   static boolean confirmDNoun(AnalysisOutput output) {
-
-    int strlen = output.getStem().length();
-    String d = output.getStem().substring(strlen-1);      
-    if(d.length() != 1 || !isDNoun(d.charAt(0))) return false;
-
-    String s = output.getStem().substring(0, strlen-1);
-    output.setNsfx(d);
-    output.setStem(s);
+    final String currentStem = output.getStem();
+    // empty or single character
+    if (currentStem.length() <= 1) {
+      return false;
+    }
+    
+    // check suffix char
+    final char suffix = currentStem.charAt(currentStem.length()-1);
+    if (!isDNoun(suffix)) {
+      return false;
+    }
+    
+    // remove suffix
+    String stem = currentStem.substring(0, currentStem.length()-1);
+    output.setNsfx(Character.toString(suffix));
+    output.setStem(stem);
           
-    WordEntry cnoun = DictionaryUtil.getAllNoun(s);
+    WordEntry cnoun = DictionaryUtil.getAllNoun(stem);
     if(cnoun != null)  {
       if(cnoun.isCompoundNoun())
         output.setCNoun(cnoun.getCompounds());