You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/18 19:20:27 UTC

svn commit: r1533562 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko: morph/ utils/

Author: rmuir
Date: Fri Oct 18 17:20:26 2013
New Revision: 1533562

URL: http://svn.apache.org/r1533562
Log:
LUCENE-4956: remove some dead code

Removed:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AbbrevAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounProperty.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/SpaceOutput.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/Status.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSAOutput.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSCandidateComparator.java
Modified:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java Fri Oct 18 17:20:26 2013
@@ -347,46 +347,6 @@ public class CompoundNounAnalyzer {
     return maxlen;
   }
   
-  private int evaluation(List<CompoundEntry> candidates) {
-    
-    int eval = 10;
-    
-    int one = 0;
-    int exist = 0;    
-    
-    for(CompoundEntry entry : candidates) {
-      if(entry.getWord().length()==1) one++;
-      if(entry.isExist()) exist++;
-    }
-    
-    if(one>3) return eval;
-    
-    eval = eval + (exist*100)/candidates.size() - (one*100)/candidates.size();
-    
-    return eval;
-  }
-  
-  private boolean containWord(String before, String input, int pos) {
-    
-    String prev = null;
-    for(int i=pos;i<input.length();i++) {
-      
-      String text = before+input.substring(pos,i+1);    
-      if(DictionaryUtil.findWithPrefix(text).hasNext()) {
-        prev = text;
-        continue;
-      }
-      
-      if(prev!=null&&DictionaryUtil.getNoun(prev)!=null) return true;
-      
-      break;
-    }
-
-    return false;
-    
-  }
-  
- 
   private CompoundEntry[] analysisBySplited(int[] units, String input, boolean isFirst) {
   
     CompoundEntry[] entries = new CompoundEntry[units.length];
@@ -410,18 +370,6 @@ public class CompoundNounAnalyzer {
     
   }
   
-  private boolean canCompound(CompoundEntry[] entries, int thredhold) {
-    
-    int achived = 0;
-    for(int i=0;i<entries.length;i++) {
-      if(entries[i].isExist()) achived += score;
-    }
-  
-    if(achived<thredhold) return false;
-    
-    return true;
-  }
-  
   /**
    * 입력된 String 을 CompoundEntry 로 변환
    * @param input input

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java Fri Oct 18 17:20:26 2013
@@ -394,18 +394,6 @@ public class MorphAnalyzer {
     VerbUtil.analysisVMXM(o.clone(), candidates);
   }    
   
-  public void analysisCNoun(List<AnalysisOutput> candidates) {
-    
-    boolean success = false;
-    for(AnalysisOutput o: candidates) {
-      if(o.getPos()!=PatternConstants.POS_NOUN) continue;
-      if(o.getScore()==AnalysisOutput.SCORE_CORRECT) 
-        success=true;
-      else if(!success)
-        confirmCNoun(o);
-    }
-  }
-  
   /**
    * 복합명사인지 조사하고, 복합명사이면 단위명사들을 찾는다.
    * 복합명사인지 여부는 단위명사가 모두 사전에 있는지 여부로 판단한다.

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java Fri Oct 18 17:20:26 2013
@@ -101,49 +101,6 @@ public class ConstraintUtil {
     if(hahes.get(key)!=null) return true;
     return false;
   }
-    
-  /**
-   * 어미가 ㄴ,ㄹ,ㅁ 으로 끝나는지 조사한다.
-   */
-  public static boolean isNLM(String eomi) {
-    
-    if(eomi==null || "".equals(eomi)) return false;
-    
-    if(eomiPnouns.get(eomi)!=null) return true;
-    
-    char[] chrs = MorphUtil.decompose(eomi.charAt(eomi.length()-1));
-    if(chrs.length==3  && eomiPnouns.get(Character.toString(chrs[2]))!=null) return true;
-    
-    return true;
-  }
-  
-  public static boolean isEomiPhrase(int ptn) {
-    
-    if(PTN_MLIST.get(ptn)!=null) return true;
-    
-    return false;
-  }
-  
-  public static boolean isJosaNounPhrase(int ptn) {
-    
-    if(PTN_JLIST.get(ptn)!=null) return true;
-    
-    return false;
-  }
-  
-  public static boolean isJosaAdvPhrase(int ptn) {
-    
-    if(PatternConstants.PTN_ADVJ==ptn) return true;
-    
-    return false;
-  }
-  
-  public static boolean isAdvPhrase(int ptn) {
-    
-    if(PatternConstants.PTN_ADVJ==ptn || PatternConstants.PTN_AID==ptn) return true;
-    
-    return false;
-  }
   
   public static boolean isTwoJosa(String josa) {
     

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java Fri Oct 18 17:20:26 2013
@@ -17,12 +17,7 @@ package org.apache.lucene.analysis.ko.ut
  * limitations under the License.
  */
 
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
-import org.apache.lucene.analysis.ko.morph.AnalysisOutput;
-import org.apache.lucene.analysis.ko.morph.PatternConstants;
 
 public class EomiUtil {
   private EomiUtil() {}
@@ -36,91 +31,6 @@ public class EomiUtil {
   };
   
   /**
-   * 가장 길이가 긴 어미를 분리한다.
-   * @param term  term
-   */
-  public static String[] longestEomi(String term)  {
-    
-    String[] result = new String[2];
-    result[0] = term;
-    
-    String stem;
-    String eomi;
-    char[] efeature;
-    
-    for(int i=term.length();i>0;i--) {
-      
-      stem = term.substring(0,i);      
-    
-      if(i!=term.length()) {
-        eomi = term.substring(i);
-        efeature  = SyllableUtil.getFeature(eomi.charAt(0));        
-      } else {
-        efeature = SyllableUtil.getFeature(stem.charAt(i-1));
-        eomi="";
-      }
-
-      if(SyllableUtil.isAlpanumeric(stem.charAt(i-1))) break;
-      
-      char[] jasos = MorphUtil.decompose(stem.charAt(i-1));
-  
-      if(!"".equals(eomi)&&!DictionaryUtil.existEomi(eomi)) {
-        // do not anything.
-      } else if(jasos.length>2&&
-          (jasos[2]=='ㄴ'||jasos[2]=='ㄹ'||jasos[2]=='ㅁ'||jasos[2]=='ㅂ')&&
-          DictionaryUtil.combineAndEomiCheck(jasos[2], eomi)!=null) {
-        result[0] = Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 0));
-        if(i!=0) result[0] = stem.substring(0,i-1)+result[0];
-        result[1] = Character.toString(jasos[2]);
-      }else if(i>0&&(stem.endsWith("하")&&"여".equals(eomi))||
-          (stem.endsWith("가")&&"거라".equals(eomi))||
-          (stem.endsWith("오")&&"너라".equals(eomi))) {
-        result[0] = stem;
-        result[1] = eomi;      
-      }else if(jasos.length==2&&(!stem.endsWith("아")&&!stem.endsWith("어"))&&
-          (jasos[1]=='ㅏ'||jasos[1]=='ㅓ'||jasos[1]=='ㅔ'||jasos[1]=='ㅐ')&&
-          (DictionaryUtil.combineAndEomiCheck('어', eomi)!=null)) {    
-        char[] chs = MorphUtil.decompose(stem.charAt(stem.length()-1));        
-        result[0] = stem;
-        result[1] = "어"+eomi;
-      }else if((jasos[1]=='ㅘ'||jasos[1]=='ㅝ'||jasos[1]=='ㅕ'||jasos[1]=='ㅐ'||jasos[1]=='ㅒ')&&
-          (DictionaryUtil.combineAndEomiCheck('어', eomi)!=null)) {        
-        String end = "";        
-        if(jasos[1]=='ㅘ')
-          end=MorphUtil.makeChar(stem.charAt(i-1), 8, 0)+"아";  
-        else if(jasos[1]=='ㅝ')
-          end=MorphUtil.makeChar(stem.charAt(i-1), 13, 0)+"어";  
-        else if(jasos[1]=='ㅕ')
-          end=Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 6, 0));
-        else if(jasos[1]=='ㅐ')
-          end=MorphUtil.makeChar(stem.charAt(i-1), 0, 0)+"어";  
-        else if(jasos[1]=='ㅒ')
-          end=MorphUtil.makeChar(stem.charAt(i-1), 20, 0)+"애";                    
-        
-        if(jasos.length==3) {          
-          end = end.substring(0,end.length()-1)+MorphUtil.replaceJongsung(end.charAt(end.length()-1),stem.charAt(i-1));
-        }
-        
-        if(stem.length()<2) result[0] = end;
-        else result[0] = stem.substring(0,stem.length()-1)+end;
-        result[1] = eomi;  
-        
-      }else if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI1]!='0'&&
-          DictionaryUtil.existEomi(eomi)) {
-        if(!(((jasos.length==2&&jasos[0]=='ㄹ')||(jasos.length==3&&jasos[2]=='ㄹ'))&&eomi.equals("러"))) { // ㄹ 불규칙은 예외
-          result[0] = stem;
-          result[1] = eomi;
-        }
-      }
-
-      if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI2]=='0') break;
-    }  
-
-    return result;
-    
-  }  
-  
-  /**
    * 선어말어미를 분석한다.
    */
   public static String[] splitPomi(String stem)  {
@@ -230,311 +140,6 @@ public class EomiUtil {
   
     return results;
   }
-  
-  /**
-   * 불규칙 용언의 원형을 구한다.
-   */
-  public static List<AnalysisOutput> irregular(AnalysisOutput output) {
-    
-    List<AnalysisOutput> results = new ArrayList<AnalysisOutput>();
-  
-    if(output.getStem()==null||output.getStem().length()==0) 
-      return results;    
-    
-    String ending = output.getEomi();
-    if(output.getPomi()!=null) ending = output.getPomi();
-    
-    List<String[]> irrs = new ArrayList<String[]>();
-    
-    irregularStem(irrs,output.getStem(),ending);
-    irregularEnding(irrs,output.getStem(),ending);
-    irregularAO(irrs,output.getStem(),ending);
-
-    for(String[] irr: irrs) {
-      AnalysisOutput result = output.clone();
-      result.setStem(irr[0]);
-      if(output.getPatn()==PatternConstants.PTN_VM) {
-        if(output.getPomi()==null) result.setEomi(irr[1]);
-        else result.setPomi(irr[1]);
-      }  
-      results.add(result);
-    }
-        
-    return results;
-    
-  }
-  
-  /**
-   * 어간만 변하는 경우
-   * @param results results
-   * @param stem  stem
-   * @param ending  ending
-   */
-  private static void irregularStem(List<String[]> results, String stem, String ending) {  
-
-    char feCh = ending.charAt(0);
-    char[] fechJaso =  MorphUtil.decompose(feCh);
-    char ls = stem.charAt(stem.length()-1);
-    char[] lsJaso = MorphUtil.decompose(ls);
-  
-    if(feCh=='아'||feCh=='어'||feCh=='으') {
-      if(lsJaso[lsJaso.length-1]=='ㄹ') { // ㄷ 불규칙
-        results.add(
-            new String[]{stem.substring(0,stem.length()-1)+
-                MorphUtil.makeChar(stem.charAt(stem.length()-1),7)
-                ,ending
-                ,String.valueOf(PatternConstants.IRR_TYPE_DI)});
-      } else if(lsJaso.length==2) { // ㅅ 불규칙
-        results.add(
-            new String[]{stem.substring(0,stem.length()-1)+
-                MorphUtil.makeChar(stem.charAt(stem.length()-1),19)
-                ,ending
-                ,String.valueOf(PatternConstants.IRR_TYPE_SI)});        
-      }      
-    }
-    
-    if((fechJaso[0]=='ㄴ'||fechJaso[0]=='ㄹ'||fechJaso[0]=='ㅁ'||  feCh=='오'||feCh=='시')
-        &&(ls=='우')) { // ㅂ 불규칙
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),17)
-              ,ending
-              ,String.valueOf(PatternConstants.IRR_TYPE_BI)});        
-    }
-    
-    if((fechJaso[0]=='ㄴ'||fechJaso[0]=='ㅂ'||fechJaso[0]=='ㅅ'||  feCh=='오')
-        &&(lsJaso.length==2)) { // ㄹ 탈락
-
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),8)
-              ,ending
-              ,String.valueOf(PatternConstants.IRR_TYPE_LI)});      
-    }
-    
-    if(lsJaso.length==2
-        &&(fechJaso[0]=='ㄴ'||fechJaso[0]=='ㄹ'||fechJaso[0]=='ㅁ'||fechJaso[0]=='ㅂ'||
-        lsJaso[1]=='ㅏ'||lsJaso[1]=='ㅓ'||lsJaso[1]=='ㅑ'||lsJaso[1]=='ㅕ')
-        &&!"나".equals(stem)) { // ㅎ 불규칙, 그러나 [낳다]는 ㅎ 불규칙이 아니다.
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),27)
-              ,ending
-              ,String.valueOf(PatternConstants.IRR_TYPE_HI)});      
-    }    
-  }
-  
-  /**
-   * 어미만 변하는 경우
-   * @param results results
-   * @param stem  stem
-   * @param ending  ending
-   */
-  private static void irregularEnding(List<String[]> results, String stem, String ending) {
-    if(ending.startsWith("ㅆ")) return;
-    
-    char feCh = ending.charAt(0);
-    char ls = stem.charAt(stem.length()-1);
-
-    if(feCh=='러'&&ls=='르') { // '러' 불규칙
-      results.add(
-          new String[]{stem
-              ,"어"+ending.substring(1)
-              ,String.valueOf(PatternConstants.IRR_TYPE_RO)});        
-    } else if("라".equals(ending)&&"가거".equals(stem)) { // '거라' 불규칙
-      results.add( 
-          new String[]{stem.substring(0,stem.length()-1)
-              ,"어라"
-              ,String.valueOf(PatternConstants.IRR_TYPE_GU)});              
-    } else if("라".equals(ending)&&"오너".equals(stem)) { // '너라' 불규칙
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)
-              ,"어라"
-              ,String.valueOf(PatternConstants.IRR_TYPE_NU)});      
-    }
-    
-    if("여".equals(ending)&&ls=='하') { // '여' 불규칙
-      results.add(
-          new String[]{stem
-              ,"어"
-              ,String.valueOf(PatternConstants.IRR_TYPE_NU)});        
-    }
-  }
-  
-  /**
-   * 어간과 어미가 모두 변하는 경우
-   * @param results results
-   * @param stem  stem
-   * @param ending  ending
-   */
-  private static void irregularAO(List<String[]> results, String stem, String ending) {
-    
-    char ls = stem.charAt(stem.length()-1);
-    char[] lsJaso = MorphUtil.decompose(ls);
-    
-    if(lsJaso.length<2) return;
-    
-    if(lsJaso[1]=='ㅘ') {
-      if(stem.endsWith("도와")||stem.endsWith("고와")) { // '곱다', '돕다'의 'ㅂ' 불규칙
-        results.add(
-            new String[]{stem.substring(0,stem.length()-2)+
-                MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ㅂ'
-                ,makeTesnseEomi("아",ending)
-                ,String.valueOf(PatternConstants.IRR_TYPE_BI)});          
-      }else { // '와' 축약
-        results.add(
-            new String[]{stem.substring(0,stem.length()-1)+
-                MorphUtil.makeChar(stem.charAt(stem.length()-1),8,0) // 자음 + ㅗ 
-                ,makeTesnseEomi("아",ending)
-                ,String.valueOf(PatternConstants.IRR_TYPE_WA)});        
-      }
-    } else if(stem.endsWith("퍼")) {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0) // 자음 + - 
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_WA)});  
-    } else if(lsJaso[1]=='ㅝ') {
-      if(stem.length()>=2) // 'ㅂ' 불규칙
-        results.add(
-            new String[]{stem.substring(0,stem.length()-2)+
-                MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ㅂ'
-                ,makeTesnseEomi("어",ending)
-                ,String.valueOf(PatternConstants.IRR_TYPE_BI)});  
-
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),13,0) // 자음 + ㅗ 
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_WA)});  
-    } else if(stem.length()>=2&&ls=='라') {
-      char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
-      if(ns.length==3&&ns[2]=='ㄹ') { // 르 불규칙
-        results.add(
-            new String[]{stem.substring(0,stem.length()-2)+
-                MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
-                ,makeTesnseEomi("아",ending)
-                ,String.valueOf(PatternConstants.IRR_TYPE_RO)});          
-      }      
-    } else if(stem.length()>=2&&ls=='러') {
-      char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
-      if(stem.charAt(stem.length()-2)=='르') { // 러 불규칙
-        results.add(
-            new String[]{stem.substring(0,stem.length()-1)
-                ,makeTesnseEomi("어",ending)
-                ,String.valueOf(PatternConstants.IRR_TYPE_LO)});  
-      } else if(ns.length==3&&ns[2]=='ㄹ') { // 르 불규칙
-        results.add(
-            new String[]{stem.substring(0,stem.length()-2)+
-                MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
-                ,makeTesnseEomi("어",ending)
-                ,String.valueOf(PatternConstants.IRR_TYPE_RO)});  
-      }
-    } else if(stem.endsWith("펴")||stem.endsWith("켜")) {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_EI)});  
-    } else if(stem.endsWith("해")) {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),0,0)
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_EI)});        
-    } else if(lsJaso.length==2&&lsJaso[1]=='ㅏ') {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_UO)});  
-    } else if(lsJaso.length==2&&lsJaso[1]=='ㅓ') {
-      // 으 탈락
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_UO)});  
-      //   아 불규칙
-      results.add(
-          new String[]{stem
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_AH)});  
-    } else if(lsJaso[1]=='ㅕ') {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_EI)});  
-    } else if(lsJaso[1]=='ㅙ') {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),11,0)
-              ,makeTesnseEomi("어",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_OE)});  
-    } else if(lsJaso[1]=='ㅐ') {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),0,27)
-              ,makeTesnseEomi("아",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
-    } else if(lsJaso[1]=='ㅒ') {
-      results.add(
-          new String[]{stem.substring(0,stem.length()-1)+
-              MorphUtil.makeChar(stem.charAt(stem.length()-1),2,27)
-              ,makeTesnseEomi("아",ending)
-              ,String.valueOf(PatternConstants.IRR_TYPE_HI)});              
-    }
-  }
-  
-  /**
-   * 시제 선어미말을 만들어서 반환한다.
-   * @param preword  '아' 또는 '어'
-   * @param endword  어미[선어미말을 포함]
-   * return '았' 또는 '었'을 만들어서 반환한다.
-   */
-  public static String makeTesnseEomi(String preword, String endword) {
-
-    if(preword==null||preword.length()==0) return endword;
-    if(endword==null||endword.length()==0) return preword;
-
-    if(endword.charAt(0)=='ㅆ') {
-      return preword.substring(0,preword.length()-1)+
-          MorphUtil.makeChar(preword.charAt(preword.length()-1),20)+endword.substring(1,endword.length());    
-    } else if(endword.charAt(0)=='ㄴ') {
-      return preword.substring(0,preword.length()-1)+
-          MorphUtil.makeChar(preword.charAt(preword.length()-1),4)+endword.substring(1,endword.length());
-    } else if(endword.charAt(0)=='ㄹ') {
-      return preword.substring(0,preword.length()-1)+
-          MorphUtil.makeChar(preword.charAt(preword.length()-1),8)+endword.substring(1,endword.length());  
-    } else if(endword.charAt(0)=='ㅁ') {
-      return preword.substring(0,preword.length()-1)+
-          MorphUtil.makeChar(preword.charAt(preword.length()-1),16)+endword.substring(1,endword.length());          
-    } else if(endword.charAt(0)=='ㅂ') {
-      return preword.substring(0,preword.length()-1)+
-          MorphUtil.makeChar(preword.charAt(preword.length()-1),17)+endword.substring(1,endword.length());
-    }
-    return preword+endword;    
-  }
- 
-   /**
-    * '음/기' + '이' + 어미, '에서/부터/에서부터' + '이' + 어미 인지 조사한다.
-    */
-   public static boolean endsWithEEomi(String stem) {
-     int len = stem.length();
-     if(len<2||!stem.endsWith("이")) return false;
-    
-     char[] jasos = MorphUtil.decompose(stem.charAt(len-2));
-     if(jasos.length==3&&jasos[2]=='ㅁ')
-       return true;
-     else {
-       int index = stem.lastIndexOf("기");
-       if(index==-1) index = stem.lastIndexOf("에서");
-       if(index==-1) index = stem.lastIndexOf("부터");
-       if(index==-1) return false;
-       return true;
-     }
-   }
    
   private static void setPomiResult(String[] results,String stem, String pomi ) {
     results[0] = stem;

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java Fri Oct 18 17:20:26 2013
@@ -19,10 +19,8 @@ package org.apache.lucene.analysis.ko.ut
 
 import java.util.List;
 
-import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
 import org.apache.lucene.analysis.ko.morph.AnalysisOutput;
 import org.apache.lucene.analysis.ko.morph.PatternConstants;
-import org.apache.lucene.analysis.ko.morph.WordEntry;
 
 public class MorphUtil {
   private MorphUtil() {}
@@ -136,58 +134,6 @@ public class MorphUtil {
     }
     
   }
-  
-  /**
-   * 용언 + '음/기' + '이' + 어미, 체언 + '에서/부터/에서부터' + '이' + 어미
-   * @param output  the output text
-   * @param candidates  the candidates
-   */
-  public static void buildPtnCM(AnalysisOutput output, List<AnalysisOutput> candidates) {
-    
-    char ch = output.getStem().charAt(output.getStem().length()-2);
-    char[] jasos = MorphUtil.decompose(ch);
-    if(jasos.length==3||ch=='기') {
-      buildPtnVMCM(output,candidates);      
-    } else {
-      
-    }
-  }
-  
-  private static void buildPtnVMCM(AnalysisOutput output, List<AnalysisOutput> candidates) {
-    String stem = output.getStem();
-  
-    output.setPatn(PatternConstants.PTN_VMCM);
-    output.setPos(PatternConstants.POS_VERB);
-    
-    char ch = stem.charAt(stem.length()-2);
-    char[] jasos = MorphUtil.decompose(ch);
-
-    if(ch=='기') {
-      output.addElist("기");
-      output.addElist("이");
-      output.setStem(stem.substring(0,stem.length()-2));
-      
-      if(DictionaryUtil.getVerb(output.getStem())!=null)
-        candidates.add(output);
-    }else if(jasos[2]=='ㅁ') {
-      if(stem.length()>1) stem = stem.substring(0,stem.length()-2);
-      stem += MorphUtil.makeChar(ch, 0);
-      output.addElist("ㅁ");
-      output.addElist("이");
-      output.setStem(stem);
-
-      if(DictionaryUtil.getVerb(stem)!=null) 
-        candidates.add(output);
-      else {
-        String[] morphs = IrregularUtil.restoreIrregularVerb(stem,"ㅁ");
-        if(morphs!=null) {
-          output.setScore(AnalysisOutput.SCORE_CORRECT);
-          output.setStem(morphs[0]);
-          candidates.add(output);
-        }
-      }
-    }
-  }
 
   public static boolean hasVerbOnly(String input) {
     
@@ -197,43 +143,4 @@ public class MorphUtil {
     }
     return false;
   }
-  
-  /**
-   * 시제 선어미말을 만들어서 반환한다.
-   * @param preword  '아' 또는 '어'
-   * @param endword  어미[선어미말을 포함]
-   * @return '았' 또는 '었'을 만들어서 반환한다.
-   */
-  public static String makeTesnseEomi(String preword, String endword) {
-
-    if(preword==null||preword.length()==0) return endword;
-    if(endword==null||endword.length()==0) return preword;
-
-    if(endword.charAt(0)=='ㅆ') {
-      return preword.substring(0,preword.length()-1)+
-          makeChar(preword.charAt(preword.length()-1),20)+endword.substring(1,endword.length());    
-    } else if(endword.charAt(0)=='ㄴ') {
-      return preword.substring(0,preword.length()-1)+
-          makeChar(preword.charAt(preword.length()-1),4)+endword.substring(1,endword.length());
-    } else if(endword.charAt(0)=='ㄹ') {
-      return preword.substring(0,preword.length()-1)+
-          makeChar(preword.charAt(preword.length()-1),8)+endword.substring(1,endword.length());  
-    } else if(endword.charAt(0)=='ㅁ') {
-      return preword.substring(0,preword.length()-1)+
-          makeChar(preword.charAt(preword.length()-1),16)+endword.substring(1,endword.length());          
-    } else if(endword.charAt(0)=='ㅂ') {
-      return preword.substring(0,preword.length()-1)+
-          makeChar(preword.charAt(preword.length()-1),17)+endword.substring(1,endword.length());
-    }
-    return preword+endword;    
-  }
-  
-  /**
-   * 용언화접미사가 결합될 수 있는지 여부를 점검한다.
-   * 특히 사전에 등록된 되다, 하다형 의 접속이 가능한지를 조사한다.
-   */
-  public static boolean isValidSuffix(WordEntry entry, AnalysisOutput o) {
-    
-    return true;
-  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java Fri Oct 18 17:20:26 2013
@@ -216,121 +216,6 @@ public class NounUtil {
     return true;
   }
     
-     
-  /**
-   * 복합명사인지 조사하고, 복합명사이면 단위명사들을 찾는다.
-   * 복합명사인지 여부는 단위명사가 모두 사전에 있는지 여부로 판단한다.
-   * 단위명사는 2글자 이상 단어에서만 찾는다.
-   * @param o
-   * @throws MorphException
-   */     
-//     public static boolean confirmCNoun(AnalysisOutput o)  {
-//
-//       if(o.getStem().length()<3) return false;
-//       if(o.getPatn()==PatternConstants.PTN_N
-//           &&DictionaryUtil.existJosa(o.getStem().substring(o.getStem().length()-2))) return false;
-//       
-//      List<CompoundEntry> results = new ArrayList();
-//      List<List> queue = new ArrayList();
-//      String prefix = o.getStem().substring(0,1);
-//      
-//      int pos = 0;
-//      boolean moreTwo =  false;
-//      while(pos<o.getStem().length()) {
-//
-//        List<WordEntry> nList = findNouns(o.getStem().substring(pos),queue.size(),o);
-//        if(nList==null) return false;
-//
-//        if(pos==0&&DictionaryUtil.existPrefix(prefix)) nList.add(new WordEntry(prefix));
-//
-//        if(nList.size()==0) {
-//          if(queue.size()==0) return false;
-//          List<WordEntry> tmpList = queue.get(queue.size()-1);
-//
-//          tmpList.remove(tmpList.size()-1);  
-//          pos -= results.get(queue.size()-1).getWord().length();        
-//          if(tmpList.size()==0) {        
-//            while(tmpList.size()==0) {        
-//              results.remove(queue.size()-1);                  
-//              queue.remove(tmpList);
-//              if(queue.size()==0) return false;
-//              
-//              tmpList = queue.get(queue.size()-1);              
-//              tmpList.remove(tmpList.size()-1);
-//              if(tmpList.size()==0) continue;
-//              
-//              pos -= results.get(queue.size()-1).getWord().length();          
-//              results.set(queue.size()-1, new CompoundEntry(tmpList.get(tmpList.size()-1).getWord(),pos));  
-//              pos += tmpList.get(tmpList.size()-1).getWord().length();            
-//                        
-//            }          
-//          }else {        
-//            results.set(queue.size()-1, new CompoundEntry(tmpList.get(tmpList.size()-1).getWord(),pos));
-//            pos += tmpList.get(tmpList.size()-1).getWord().length();
-//          }    
-//
-//        } else {
-//          queue.add(nList);
-//          WordEntry noun = nList.get(nList.size()-1);
-//          results.add(new CompoundEntry(noun.getWord(),pos));
-//          pos += noun.getWord().length();
-//          if(noun.getCompounds().size()>0) o.addCNoun(noun.getCompounds());
-//          if(noun.getWord().length()>1) moreTwo=true;
-//        }
-//      }
-//
-//      if(results.size()>1&&DNouns.contains(results.get(results.size()-1).getWord())) {
-//        CompoundEntry dnoun = results.remove(results.size()-1);
-//              o.setStem(o.getStem().substring(0,o.getStem().length()-dnoun.getWord().length()));
-//              o.setNsfx(dnoun.getWord());      
-//      }
-//      
-//      if(results.size()>1) o.addCNoun(results);  
-//      
-//      o.setScore(AnalysisOutput.SCORE_CORRECT);
-//      return true;
-//    }
-    
-  /**
-   * 복합명사에서 단위명사를 분리해낸다.
-   * 리스트의 가장 마지막에 위치한 단어가 최장단어이다.
-   * @param str  복합명사
-   * @param pos the analysing start point
-   * @param o    분석결과
-   * return    단위명사 리스트
-   */
-  private static List<WordEntry> findNouns(String str, int pos, AnalysisOutput o) {
-
-    List<WordEntry> nList = new ArrayList<WordEntry>();
-
-    if(str.length()==2&&DictionaryUtil.existSuffix(str.substring(0,1))&&DNouns.contains(str.substring(1))) {
-      o.setStem(o.getStem().substring(0,o.getStem().length()-1));
-      o.setNsfx(str.substring(1));
-      nList.add(new WordEntry(str.substring(0,1)));
-      return nList;
-    }else if(str.length()==2&&DictionaryUtil.existSuffix(str.substring(0,1))&&DictionaryUtil.existJosa(str.substring(1))) {
-      return null;
-    }
-      
-    if(pos>=2&&DictionaryUtil.existJosa(str)) return null;
-      
-    if(str.length()==1&&(DictionaryUtil.existSuffix(str)||DNouns.contains(str))) {
-      nList.add(new WordEntry(str));
-      return nList;
-    }
-
-    for(int i=1;i<str.length();i++) {    
-      String sub = str.substring(0,i+1);    
-      if(!DictionaryUtil.findWithPrefix(sub).hasNext()) break;
-      WordEntry entry = DictionaryUtil.getAllNoun(sub);  
-      if(entry!=null) {          
-        nList.add(entry);
-      }
-    }
-
-    return nList;      
-  }
-    
   /*
      * 마지막 음절이 명사형 접미사(등,상..)인지 조사한다.
      */
@@ -355,14 +240,6 @@ public class NounUtil {
           
     return true;
   }
-    
-//      public static int endsWithDNoun(String stem)   {
-//          for(int i = 0; i < DNouns.length; i++)
-//              if(stem.endsWith(DNouns[i]))
-//                  return stem.lastIndexOf(DNouns[i]);
-//
-//          return -1;
-//      }
       
   public static boolean endsWith2Josa(String input) {
 

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java Fri Oct 18 17:20:26 2013
@@ -113,8 +113,4 @@ public class SyllableUtil {
     return getFeature(idx);
     
   }
-  
-  public static boolean isAlpanumeric(char ch) {
-    return (ch>='0'&&ch<='z');
-  }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java Fri Oct 18 17:20:26 2013
@@ -81,16 +81,6 @@ public class VerbUtil {
      
   }
    
-  public static boolean constraintVerb(String start, String end) {
-     
-    char[] schs = MorphUtil.decompose(start.charAt(start.length()-1));
-    char[] echs = MorphUtil.decompose(end.charAt(0));
-     
-    if(schs.length==3&&schs[2]=='ㄹ'&&echs[0]=='ㄹ') return false;
-     
-    return true;
-  }
-   
   /**
    * 3. 학교에서이다 : 체언 + '에서/부터/에서부터' + '이' + 어미 (PTN_NJCM) <br>
    */