You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 07:52:00 UTC

svn commit: r1534029 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko: ./ morph/

Author: rmuir
Date: Mon Oct 21 05:51:59 2013
New Revision: 1534029

URL: http://svn.apache.org/r1534029
Log:
LUCENE-4956: more morph cleanups

Modified:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java Mon Oct 21 05:51:59 2013
@@ -294,7 +294,6 @@ public final class KoreanFilter extends 
     for(int i=0;i<term.length();i++) {
 
       char[] chs = HanjaMapper.convertToHangul(term.charAt(i));      
-      if(chs==null) continue;
     
       List<StringBuilder> removeList = new ArrayList<StringBuilder>(); // 제거될 후보를 저장  
       

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java Mon Oct 21 05:51:59 2013
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ko.mo
 
 import java.util.Comparator;
 
-public class AnalysisOutputComparator<T> implements Comparator<T> {
+class AnalysisOutputComparator<T> implements Comparator<T> {
   public int compare(T o1, T o2) {
     
     AnalysisOutput out1 = (AnalysisOutput)o1;

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java Mon Oct 21 05:51:59 2013
@@ -23,12 +23,12 @@ package org.apache.lucene.analysis.ko.mo
 class ConstraintUtil {
   private ConstraintUtil() {}
   
-  public static boolean canHaheCompound(String key) {
+  static boolean canHaheCompound(String key) {
     return key.length() == 2 && ("민족".equals(key) || "동서".equals(key) || "남북".equals(key));
   }
   
   // 종성이 있는 음절과 연결될 수 없는 조사
-  public static boolean isTwoJosa(char josa) {
+  static boolean isTwoJosa(char josa) {
     switch (josa) {
       case '가':
       case '는':
@@ -49,7 +49,7 @@ class ConstraintUtil {
   }
   
   // 종성이 없는 음절과 연결될 수 없는 조사
-  public static boolean isThreeJosa(char josa) {
+  static boolean isThreeJosa(char josa) {
     switch (josa) {
       case 'ê³¼':
       case '은':

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java Mon Oct 21 05:51:59 2013
@@ -34,7 +34,7 @@ class EomiUtil {
   /**
    * 선어말어미를 분석한다.
    */
-  public static String[] splitPomi(String stem)  {
+  static String[] splitPomi(String stem)  {
 
     //   results[0]:성공(1)/실패(0), results[1]: 어근, results[2]: 선어말어미
     String[] results = new String[2];  
@@ -171,7 +171,7 @@ class EomiUtil {
    * 4. 어미 '아/어'가 탈락되는 어절
    * 5. '아/어'의 변이체 분리
    */
-  public static String[] splitEomi(String stem, String end) {
+  static String[] splitEomi(String stem, String end) {
 
     String[] strs = new String[2];
     int strlen = stem.length();

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java Mon Oct 21 05:51:59 2013
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.ko.dic
 class IrregularUtil {
   private IrregularUtil() {}
   
-  public static String[] restoreIrregularVerb(String start, String end) {
+  static String[] restoreIrregularVerb(String start, String end) {
 
     if(end==null) end="";
     char[] jasos = new char[0];    

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java Mon Oct 21 05:51:59 2013
@@ -26,21 +26,6 @@ import org.apache.lucene.analysis.ko.dic
 import org.apache.lucene.analysis.ko.dic.SyllableFeatures;
 
 public class MorphAnalyzer {
-
-  /**
-   * starting word of sentence.
-   */
-  public static final int POS_START = 1;
-  
-  /**
-   * middle word of sentence
-   */
-  public static final int POS_MID = 2;
-  
-  /**
-   * ending word of sentence.
-   */
-  public static final int POS_END = 3;    
   
   private CompoundNounAnalyzer cnAnalyzer = new CompoundNounAnalyzer();  
   
@@ -52,21 +37,12 @@ public class MorphAnalyzer {
     cnAnalyzer.setExactMach(is);
   }
   
-  public List<AnalysisOutput> analyze(String input) {  
-
-    if(input.endsWith("."))  
-      return analyze(input.substring(0,input.length()-1), POS_END);
-    
-    return analyze(input, POS_MID);
-  }
-  
   /**
    * 
    * @param input input
-   * @param pos pos
    * @return candidates
    */
-  public List<AnalysisOutput> analyze(String input, int pos) {    
+  public List<AnalysisOutput> analyze(String input) {    
 
     List<AnalysisOutput> candidates = new ArrayList<AnalysisOutput>();        
     boolean isVerbOnly = MorphUtil.hasVerbOnly(input);
@@ -267,21 +243,20 @@ public class MorphAnalyzer {
     WordEntry entry;
     if((entry=DictionaryUtil.getWord(word))!=null) {
 
-      if(entry.getFeature(WordEntry.IDX_NOUN)!='1'&&
-          entry.getFeature(WordEntry.IDX_BUSA)=='1') {
+      if (entry.isCompoundNoun()) {
+        candidates.add(0,output);
+      } else if (entry.isNoun()) {
+        output.setScore(AnalysisOutput.SCORE_CORRECT);
+        candidates.add(0,output);
+      } else if (entry.isAdverb()) {
         AnalysisOutput busa = new AnalysisOutput(word, null, null, PatternConstants.PTN_AID);
         busa.setPos(PatternConstants.POS_ETC);
         
         busa.setScore(AnalysisOutput.SCORE_CORRECT);
         candidates.add(0,busa);    
-      }else if(entry.getFeature(WordEntry.IDX_NOUN)=='1') {
-        output.setScore(AnalysisOutput.SCORE_CORRECT);
-        candidates.add(0,output);
-      }else if(entry.getFeature(WordEntry.IDX_NOUN)=='2') {
-        candidates.add(0,output);
       }
       
-      if(entry.getFeature(WordEntry.IDX_VERB)!='1') return;
+      if(!entry.isVerb()) return;
     } else if(candidates.size()==0||!NounUtil.endsWith2Josa(word)) {
       output.setScore(AnalysisOutput.SCORE_ANALYSIS);
       candidates.add(0,output);
@@ -298,7 +273,7 @@ public class MorphAnalyzer {
    * @param end end
    * @param candidates  candidates
    */
-  public void analysisWithJosa(String stem, String end, List<AnalysisOutput> candidates) {
+  void analysisWithJosa(String stem, String end, List<AnalysisOutput> candidates) {
     if(stem==null||stem.length()==0) return;  
     
     char[] chrs = MorphUtil.decompose(stem.charAt(stem.length()-1));
@@ -314,7 +289,7 @@ public class MorphAnalyzer {
     WordEntry entry = DictionaryUtil.getWordExceptVerb(stem);
     if(entry!=null) {
       output.setScore(AnalysisOutput.SCORE_CORRECT);
-      if(entry.getFeature(WordEntry.IDX_NOUN)=='0'&&entry.getFeature(WordEntry.IDX_BUSA)=='1') {
+      if(!entry.isNoun() && entry.isAdverb()) {
         output.setPos(PatternConstants.POS_ETC);
         output.setPatn(PatternConstants.PTN_ADVJ);
       }
@@ -340,7 +315,7 @@ public class MorphAnalyzer {
    * @param end end
    * @param candidates  candidates
    */
-  public void analysisWithEomi(String stem, String end, List<AnalysisOutput> candidates) {
+  void analysisWithEomi(String stem, String end, List<AnalysisOutput> candidates) {
     
     String[] morphs = EomiUtil.splitEomi(stem, end);
     if(morphs[0]==null) return; // 어미가 사전에 등록되어 있지 않다면....
@@ -396,7 +371,7 @@ public class MorphAnalyzer {
    * 복합명사인지 여부는 단위명사가 모두 사전에 있는지 여부로 판단한다.
    * 단위명사는 2글자 이상 단어에서만 찾는다.
    */
-  public boolean confirmCNoun(AnalysisOutput o)  {
+  boolean confirmCNoun(AnalysisOutput o)  {
 
     if(o.getStem().length()<3) return false;
      
@@ -455,7 +430,7 @@ public class MorphAnalyzer {
     }else if(o.getPatn()==PatternConstants.PTN_NSM) {         
       if("내".equals(o.getVsfx())&&cnouns.get(cnouns.size()-1).getWord().length()!=1) {
         WordEntry entry = DictionaryUtil.getWord(cnouns.get(cnouns.size()-1).getWord());
-        if(entry!=null&&entry.getFeature(WordEntry.IDX_NE)=='0') return false;
+        if(entry!=null && !entry.hasNE()) return false;
 //      }else if("하".equals(o.getVsfx())&&cnouns.get(cnouns.size()-1).getWord().length()==1) { 
 //        // 짝사랑하다 와 같은 경우에 뒷글자가 1글자이면 제외
 //        return false;

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java Mon Oct 21 05:51:59 2013
@@ -48,7 +48,7 @@ class MorphUtil {
    * 한글 한글자를 초성/중성/종성의 배열로 만들어 반환한다.
    * @param c the character to be decomposed
    */
-  public static char[] decompose(char c) {
+  static char[] decompose(char c) {
     char[] result = null;
 
     if(c>0xD7A3||c<0xAC00) return new char[]{c};
@@ -70,18 +70,17 @@ class MorphUtil {
     return result;
   }  
   
-  public static char compound(int first, int middle, int last) {    
+  static char compound(int first, int middle, int last) {    
     return (char)(0xAC00 + first* JUNG_JONG + middle * JONGSEONG.length + last);
   }
   
-
-  public static char makeChar(char ch, int mdl, int last) {    
+  static char makeChar(char ch, int mdl, int last) {    
     ch -= 0xAC00;    
     int first = ch/JUNG_JONG;     
     return compound(first,mdl,last);
   }
   
-  public static char makeChar(char ch, int last) {
+  static char makeChar(char ch, int last) {
     ch -= 0xAC00;    
     int first = ch/JUNG_JONG;  
     ch = (char)(ch % JUNG_JONG);
@@ -90,30 +89,14 @@ class MorphUtil {
     return compound(first,middle,last);    
   }
   
-  public static char replaceJongsung(char dest, char source) {
+  static char replaceJongsung(char dest, char source) {
     source -= 0xAC00;    
     int last = source % JONGSEONG.length;
       
     return makeChar(dest,last);  
   }
-
-  /**
-   * 형태소 유형 출력을 위한 문자열을 생성한다.
-   * @param word  word to be printed
-   * @param type  the type of the input word
-   */
-  public static String buildTypeString(String word, char type) {
-    StringBuffer sb = new StringBuffer();
-    sb.append(word);
-    sb.append("(");
-    sb.append(type);
-    sb.append(")");
-    
-    return sb.toString();
-  }
-  
   
-  public static void buildPtnVM(AnalysisOutput output, List<AnalysisOutput> candidates) {
+  static void buildPtnVM(AnalysisOutput output, List<AnalysisOutput> candidates) {
     
     String end = output.getEomi();
     if(output.getPomi()!=null) end = output.getPomi();
@@ -134,7 +117,7 @@ class MorphUtil {
     
   }
 
-  public static boolean hasVerbOnly(String input) {
+  static boolean hasVerbOnly(String input) {
     for (int i = input.length()-1; i >=0; i--) {
       if (SyllableFeatures.hasFeature(input.charAt(i), SyllableFeatures.WDSURF)) {
         assert input.length() > i;

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java Mon Oct 21 05:51:59 2013
@@ -46,7 +46,7 @@ class NounUtil {
    * @param o the analyzed output
    * @param candidates  candidates
    */
-  public static boolean analysisMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  static boolean analysisMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
 
     int strlen = o.getStem().length();
        
@@ -94,7 +94,7 @@ class NounUtil {
    * @param o the analyzed output
    * @param candidates  candidates
    */
-  public static boolean analysisVMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  private static boolean analysisVMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
 
     String[] irrs =  IrregularUtil.restoreIrregularVerb(o.getStem(), o.getElist().get(0));
     if(irrs!=null) {
@@ -118,7 +118,7 @@ class NounUtil {
    * @param o the analyzed output
    * @param candidates  candidates
    */
-  public static boolean analysisVMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  private static boolean analysisVMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
   
     int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
 
@@ -162,7 +162,7 @@ class NounUtil {
    * @param o the analyzed output
    * @param candidates  candidates
    */
-  public static boolean analysisNSMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  private static boolean analysisNSMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
 
     int idxVbSfix = VerbUtil.endsWithVerbSuffix(o.getStem());        
     if(idxVbSfix==-1) return false;
@@ -175,10 +175,10 @@ class NounUtil {
     WordEntry entry = DictionaryUtil.getWordExceptVerb(o.getStem());
 
     if(entry!=null) {
-      if(entry.getFeature(WordEntry.IDX_NOUN)=='0') return false;
-      else if(o.getVsfx().equals("하")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
-      else if(o.getVsfx().equals("되")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
-      else if(o.getVsfx().equals("내")&&entry.getFeature(WordEntry.IDX_NE)!='1') return false;
+      if(!entry.isNoun()) return false;
+      else if(o.getVsfx().equals("하") && !entry.hasDOV()) return false;
+      else if(o.getVsfx().equals("되") && !entry.hasBEV()) return false;
+      else if(o.getVsfx().equals("내") && !entry.hasNE()) return false;
       o.setScore(AnalysisOutput.SCORE_CORRECT); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다.      
     }else {
       o.setScore(AnalysisOutput.SCORE_ANALYSIS); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다.
@@ -189,7 +189,7 @@ class NounUtil {
     return true;
   }         
      
-  public static boolean analysisNSMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  private static boolean analysisNSMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
 
     int idxVbSfix = VerbUtil.endsWithVerbSuffix(o.getStem());        
     if(idxVbSfix==-1) return false;
@@ -202,10 +202,10 @@ class NounUtil {
     WordEntry entry = DictionaryUtil.getWordExceptVerb(o.getStem());
 
     if(entry!=null) {
-      if(entry.getFeature(WordEntry.IDX_NOUN)=='0') return false;
-      else if(o.getVsfx().equals("하")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
-      else if(o.getVsfx().equals("되")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
-      else if(o.getVsfx().equals("내")&&entry.getFeature(WordEntry.IDX_NE)!='1') return false;
+      if(!entry.isNoun()) return false;
+      else if(o.getVsfx().equals("하") && !entry.hasDOV()) return false;
+      else if(o.getVsfx().equals("되") && !entry.hasBEV()) return false;
+      else if(o.getVsfx().equals("내") && !entry.hasNE()) return false;
       o.setScore(AnalysisOutput.SCORE_CORRECT); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다.      
     }else {
       o.setScore(AnalysisOutput.SCORE_ANALYSIS); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다.
@@ -219,7 +219,7 @@ class NounUtil {
   /*
      * 마지막 음절이 명사형 접미사(등,상..)인지 조사한다.
      */
-  public static boolean confirmDNoun(AnalysisOutput output) {
+  static boolean confirmDNoun(AnalysisOutput output) {
 
     int strlen = output.getStem().length();
     String d = output.getStem().substring(strlen-1);      
@@ -241,7 +241,7 @@ class NounUtil {
     return true;
   }
       
-  public static boolean endsWith2Josa(String input) {
+  static boolean endsWith2Josa(String input) {
     for (int i = input.length()-2; i > 0; i--) {
       String josa = input.substring(i);
 
@@ -254,10 +254,12 @@ class NounUtil {
     return false;
   }
       
-  public static double countFoundNouns(AnalysisOutput o) {
+  static double countFoundNouns(AnalysisOutput o) {
     int count = 0;
-    for(int i=0;i<o.getCNounList().size();i++) {
-      if(o.getCNounList().get(i).isExist()) count++;
+    for (CompoundEntry entry : o.getCNounList()) {
+      if (entry.isExist()) {
+        count++;
+      }
     }
     return (count*100)/o.getCNounList().size();
   }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java Mon Oct 21 05:51:59 2013
@@ -26,11 +26,11 @@ import org.apache.lucene.analysis.ko.dic
 class VerbUtil {
   private VerbUtil() {}
 
-  public static final Map<String, String> verbSuffix = new HashMap<String, String>();
+  private static final Map<String, String> verbSuffix = new HashMap<String, String>();
   
-  public static final Map<String, String> XVerb = new HashMap<String, String>();
+  private static final Map<String, String> XVerb = new HashMap<String, String>();
   
-  public static final Map<String, String> wiAbbrevs = new HashMap<String, String>();
+  private static final Map<String, String> wiAbbrevs = new HashMap<String, String>();
   
   static {
     String[] suffixs = {
@@ -47,7 +47,7 @@ class VerbUtil {
   /**
    * 어간이 용언화접미사로 끝나면 index 를 반환한다.  아니면 -1을 반환한다.
    */
-  public static int endsWithVerbSuffix(String stem) {
+  static int endsWithVerbSuffix(String stem) {
     int len = stem.length();
     if(len<2) return -1;
     int start = 2;
@@ -61,7 +61,7 @@ class VerbUtil {
   /**
    * 어간부에 보조용언 [하,되,오,내,주,지]가 있는지 조사한다.
    */
-  public static int endsWithXVerb(String stem) {
+  static int endsWithXVerb(String stem) {
     int len = stem.length();
     if(len<2) return -1;
     int start = 2;
@@ -72,16 +72,14 @@ class VerbUtil {
     return -1;
   }
    
-  public static boolean verbSuffix(String stem) {
-
+  static boolean verbSuffix(String stem) {
     return verbSuffix.get(stem)!=null;
-     
   }
    
   /**
    * 3. 학교에서이다 : 체언 + '에서/부터/에서부터' + '이' + 어미 (PTN_NJCM) <br>
    */
-  public static boolean ananlysisNJCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  static boolean ananlysisNJCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
  
     int strlen = o.getStem().length();
     boolean success = false;
@@ -119,7 +117,7 @@ class VerbUtil {
    * @param o  어미부와 어간부가 분리된 결과
    * @param candidates  candidates
    */
-  public static boolean ananlysisNSM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  static boolean ananlysisNSM(AnalysisOutput o, List<AnalysisOutput> candidates) {
 
     if(o.getStem().endsWith("스러우")) o.setStem(o.getStem().substring(0,o.getStem().length()-3)+"스럽");
 
@@ -132,7 +130,6 @@ class VerbUtil {
       o.setStem(o.getStem().substring(0,idxVbSfix));
       entry = DictionaryUtil.getAllNoun(o.getStem());
     } else { // 이 축약인 경우
-      if(entry==null) return false;
       o.setVsfx("이");
       o.setStem(o.getStem());
     }
@@ -141,10 +138,10 @@ class VerbUtil {
     o.setPos(PatternConstants.POS_NOUN);
         
     if(entry!=null) {
-      if(entry.getFeature(WordEntry.IDX_NOUN)=='0') return false;
-      else if(o.getVsfx().equals("하")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
-      else if(o.getVsfx().equals("되")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
-      else if(o.getVsfx().equals("내")&&entry.getFeature(WordEntry.IDX_NE)!='1') return false;
+      if(!entry.isNoun()) return false;
+      else if(o.getVsfx().equals("하") && !entry.hasDOV()) return false;
+      else if(o.getVsfx().equals("되") && !entry.hasBEV()) return false;
+      else if(o.getVsfx().equals("내") && !entry.hasNE()) return false;
       else if(o.getVsfx().equals("이")&&o.getEomi().equals("어")) return false;
       o.setScore(AnalysisOutput.SCORE_CORRECT); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다.      
     }else {
@@ -157,7 +154,7 @@ class VerbUtil {
 
   }
    
-  public static boolean ananlysisNSMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  static boolean ananlysisNSMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
    
     int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
     if(idxXVerb==-1) return false;
@@ -187,8 +184,8 @@ class VerbUtil {
 //    }
     if(entry==null) return false;  
     
-    if(o.getVsfx().equals("하")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
-    if(o.getVsfx().equals("되")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;        
+    if(o.getVsfx().equals("하") && !entry.hasDOV()) return false;
+    if(o.getVsfx().equals("되") && !entry.hasBEV()) return false;        
     
     o.setScore(AnalysisOutput.SCORE_CORRECT);
     if(entry.isCompoundNoun()) {
@@ -200,7 +197,7 @@ class VerbUtil {
     return (o.getScore()==AnalysisOutput.SCORE_CORRECT);     
   }
    
-  public static boolean analysisVMCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  static boolean analysisVMCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
    
     int strlen = o.getStem().length();
      
@@ -248,7 +245,7 @@ class VerbUtil {
    * 6. 도와주다 : 용언 + '아/어' + 보조용언 + 어미 (PTN_VMXM)
    * 
    */
-  public static boolean analysisVMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+  static boolean analysisVMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
 
     int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
 

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java Mon Oct 21 05:51:59 2013
@@ -28,36 +28,28 @@ class WSOutput  implements Cloneable {
   
   private List<AnalysisOutput> phrases = new ArrayList<AnalysisOutput>();
   
-  public WSOutput() {
-    
-  }
-  
-  public WSOutput(AnalysisOutput o) {
-    addPhrase(o);
-  }
-  
-  public int getLastStart() {
+  int getLastStart() {
     return lastStart;
   }
 
-  public void setLastStart(int start) {
+  void setLastStart(int start) {
     this.lastStart = start;
   }
 
-  public int getLastEnd() {
+  int getLastEnd() {
     return lastEnd;
   }
 
-  public void setLastEnd(int end) {
+  void setLastEnd(int end) {
     this.lastStart = end;
   }
   
 
-  public List<AnalysisOutput> getPhrases() {
+  List<AnalysisOutput> getPhrases() {
     return phrases;
   }
 
-  public void removeLast() {
+  void removeLast() {
         
     if(this.phrases.size()==0) return;
     
@@ -81,7 +73,7 @@ class WSOutput  implements Cloneable {
     }
   }
   
-  public void addPhrase(AnalysisOutput o) {
+  void addPhrase(AnalysisOutput o) {
 
     this.lastStart = this.lastEnd;
     this.lastEnd += o.getSource().length();
@@ -92,7 +84,7 @@ class WSOutput  implements Cloneable {
       addCompounds(o);
   }
   
-  private void addCompounds(AnalysisOutput o) {
+  void addCompounds(AnalysisOutput o) {
     
     List<CompoundEntry> cnouns = o.getCNounList();
       
@@ -151,7 +143,7 @@ class WSOutput  implements Cloneable {
 
   }
   
-  public void setPhrases(List<AnalysisOutput> phrases) {
+  void setPhrases(List<AnalysisOutput> phrases) {
     this.phrases = phrases;
   }
   

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java Mon Oct 21 05:51:59 2013
@@ -22,13 +22,13 @@ import java.util.List;
 
 public class WordEntry {
 
-  static final int IDX_NOUN = 0;
-  static final int IDX_VERB = 1;
-  static final int IDX_BUSA = 2;
-  public static final int IDX_DOV = 3;
-  public static final int IDX_BEV = 4;
-  public static final int IDX_NE = 5;
-  static final int IDX_REGURA = 9;
+  private static final int IDX_NOUN = 0;
+  private static final int IDX_VERB = 1;
+  private static final int IDX_BUSA = 2;
+  private static final int IDX_DOV = 3;
+  private static final int IDX_BEV = 4;
+  private static final int IDX_NE = 5;
+  private static final int IDX_REGURA = 9;
   
   /** Irregular verb type (ㅂ-final) */
   public static final int VERB_TYPE_BIUP = 'B';
@@ -80,11 +80,7 @@ public class WordEntry {
   }
   
   public String getWord() {
-    return this.word;
-  }
-  
-  public char getFeature(int index) {
-    return features[index];
+    return word;
   }
   
   /** Returns true if the entry is a noun (or compound noun) */
@@ -117,4 +113,19 @@ public class WordEntry {
   public boolean isAdverb() {
     return features[IDX_BUSA] == '1';
   }
+  
+  /** allows noun analysis with -하 verb suffix */
+  public boolean hasDOV() {
+    return features[IDX_DOV] == '1';
+  }
+  
+  /** allows noun analysis with -되 verb suffix */
+  public boolean hasBEV() {
+    return features[IDX_BEV] == '1';
+  }
+  
+  /** allows noun analysis with -내 verb suffix */
+  public boolean hasNE() {
+    return features[IDX_NE] == '1';
+  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java Mon Oct 21 05:51:59 2013
@@ -132,12 +132,10 @@ public class WordSpaceAnalyzer {
    * @return  calculated score
    */
   public int getOutputScore(List<AnalysisOutput> list) {
-    
     int score = 100;
-    for(AnalysisOutput o : list) {
-      if(o.getScore()<score) score = o.getScore();
+    for (AnalysisOutput o : list) {
+      score = Math.min(score, o.getScore());
     }
-    
     return score;
   }
   
@@ -499,7 +497,7 @@ public class WordSpaceAnalyzer {
     
     int ptn = PatternConstants.PTN_N;
     
-    if(entry.getFeature(WordEntry.IDX_NOUN)=='0') {
+    if(!entry.isNoun()) {
       pos = PatternConstants.POS_AID;
       ptn = PatternConstants.PTN_AID;
     }