You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/19 23:16:30 UTC

svn commit: r1533835 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko: dic/ morph/ utils/

Author: rmuir
Date: Sat Oct 19 21:16:29 2013
New Revision: 1533835

URL: http://svn.apache.org/r1533835
Log:
LUCENE-4956: more cleanups and visibility fixes

Modified:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sat Oct 19 21:16:29 2013
@@ -167,14 +167,6 @@ public class DictionaryUtil {
     return null;
   }
   
-  public static WordEntry getAdverb(String key) {
-    WordEntry entry = getWord(key);
-    if(entry==null) return null;
-
-    if(entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
-    return null;
-  }
-  
   public static WordEntry getBusa(String key) {
     WordEntry entry = getWord(key);
     if(entry==null) return null;
@@ -183,31 +175,6 @@ public class DictionaryUtil {
     return null;
   }
   
-  public static WordEntry getIrrVerb(String key, char irrType) {
-    WordEntry entry = getWord(key);
-    if(entry==null) return null;
-
-    if(entry.getFeature(WordEntry.IDX_VERB)=='1'&&
-        entry.getFeature(WordEntry.IDX_REGURA)==irrType) return entry;
-    return null;
-  }
-  
-  public static WordEntry getBeVerb(String key) {
-    WordEntry entry = getWord(key);
-    if(entry==null) return null;
-    
-    if(entry.getFeature(WordEntry.IDX_BEV)=='1') return entry;
-    return null;
-  }
-  
-  public static WordEntry getDoVerb(String key) {
-    WordEntry entry = getWord(key);
-    if(entry==null) return null;
-    
-    if(entry.getFeature(WordEntry.IDX_DOV)=='1') return entry;
-    return null;
-  }
-  
   public static WordEntry getUncompound(String key) {
     return uncompounds.get(key);
   }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java Sat Oct 19 21:16:29 2013
@@ -20,8 +20,6 @@ package org.apache.lucene.analysis.ko.mo
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.lucene.analysis.ko.utils.MorphUtil;
-
 public class AnalysisOutput implements Cloneable {
 
   public static final int SCORE_CORRECT = 100;
@@ -33,29 +31,20 @@ public class AnalysisOutput implements C
   private String source; //분석하기 전 문자열(띄워쓰기 모듈에서 사용된다.)
   private int score; // score of this result
   private int patn; // word pattern
-  private char type; // type of input word
   private List<CompoundEntry> compound = new ArrayList<CompoundEntry>(); // compound noun of input word
   private String stem;
   private char pos; // 3 simplified stem type
-  private char pos2; // pos attr. for 'pos'
-  private char dinf; // pos inf in Han-dic
   private String nsfx; // index of noun suffix
   private String josa; // josa string
-  private List<String> jlist = new ArrayList<String>(); // unit-josa sequence
   private String eomi;  // Eomi string
   private List<String> elist = new ArrayList<String>(); // unit-Eomi sequence
   private String pomi; // prefinal Eomi
   private String xverb; // Xverb string
   private String vsfx; // verb suffix
-  private char vtype; // irregular type
   
   private int maxWordLen = 0; // the max length of words within compound nouns
   private int dicWordLen = 0; // the sum of the length of words within compound nouns
   
-  public AnalysisOutput() {
-    this.score = SCORE_FAIL;
-  }
-  
   public AnalysisOutput(String stem, String josa, String eomi, int patn) {
     this.score = SCORE_ANALYSIS;    
     this.stem=stem;
@@ -80,9 +69,6 @@ public class AnalysisOutput implements C
   public void setPatn(int i) {
     this.patn = i;
   }
-  public void setType(char c) {
-    this.type = c;
-  }
   
   public void setStem(String s) {
     this.stem = s;
@@ -93,14 +79,6 @@ public class AnalysisOutput implements C
     this.pos = c;
   }
   
-  public void setPos2(char c){
-    this.pos2 = c;
-  }
-  
-  public void setDinf(char c){
-    this.dinf = c;
-  }
-  
   public void setNsfx(String s) {
     this.nsfx = s;    
   }
@@ -109,10 +87,6 @@ public class AnalysisOutput implements C
     this.josa = s;
   }
   
-  public void addJlist(String l) {
-    this.jlist.add(l);
-  }
-  
   public void setEomi(String s){
     this.eomi = s;
   }
@@ -134,9 +108,6 @@ public class AnalysisOutput implements C
   public void setVsfx(String s) {
     this.vsfx = s;
   }
-  public void setVtype(char c) {
-    this.vtype = c;
-  }
 
   public int getScore() {
     return this.score;
@@ -144,31 +115,19 @@ public class AnalysisOutput implements C
   public int getPatn() {
     return this.patn;
   }
-  
-  public char getType() {
-    return this.type;
-  }  
+
   public String getStem() {
     return stem;
   }  
   public char getPos() {
     return this.pos;
   }
-  public char getPos2() {
-    return this.pos2;
-  }
-  public char getDinf() {
-    return this.dinf;
-  }
   public String getNsfx() {
     return this.nsfx;
   }
   public String getJosa() {
     return this.josa;
   }
-  public List<String> getJlist() {
-    return this.jlist;
-  }
   public String getEomi() {
     return this.eomi;
   }
@@ -184,9 +143,6 @@ public class AnalysisOutput implements C
   public String getVsfx() {
     return this.vsfx;
   }
-  public char getVtype() {
-    return this.vtype;
-  }
   
   public int getMaxWordLen() {
     return maxWordLen;
@@ -235,98 +191,10 @@ public class AnalysisOutput implements C
   }
   
   public AnalysisOutput clone() {
-    final AnalysisOutput output;
     try {
-      output = (AnalysisOutput)super.clone();
+      return (AnalysisOutput)super.clone();
     } catch (CloneNotSupportedException cnse) {
       throw new AssertionError();
     }
-    output.setDinf(this.dinf);
-    output.setEomi(this.eomi);
-    output.setJosa(this.josa);
-    output.setNsfx(this.nsfx);
-    output.setPatn(this.patn);
-    output.setPomi(this.pomi);
-    output.setPos(this.pos);
-    output.setPos2(this.pos2);
-    output.setScore(this.score);
-    output.setStem(this.stem);
-    output.setType(this.type);
-    output.setVsfx(this.vsfx);
-    output.setVtype(this.vtype);
-    output.setXverb(this.xverb);
-    
-    return output;
-  }
-  
-  public String toString() {
-    StringBuffer buff = new StringBuffer();
-    
-    buff.append(MorphUtil.buildTypeString(getStem(),getPos()));
-    if(getNsfx()!=null)
-      buff.append(",").append(MorphUtil.buildTypeString(getNsfx(),PatternConstants.POS_SFX_N));
-    
-    if(getPatn()==PatternConstants.PTN_NJ || getPatn()==PatternConstants.PTN_ADVJ) {
-      buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
-    }else if(getPatn()==PatternConstants.PTN_NSM) {
-      buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
-      buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));      
-    }else if(getPatn()==PatternConstants.PTN_NSMJ) {
-      buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
-      buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
-    }else if(getPatn()==PatternConstants.PTN_NSMXM) {
-      buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_COPULA));
-      buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));    
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
-      buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
-    }else if(getPatn()==PatternConstants.PTN_NJCM) {
-      buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_SFX_V));
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));  
-    }else if(getPatn()==PatternConstants.PTN_NSMXMJ) {
-      buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));      
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_COPULA));      
-      buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));  
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));  
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));        
-    }else if(getPatn()==PatternConstants.PTN_VM) {
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));        
-    }else if(getPatn()==PatternConstants.PTN_VMJ) {
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));        
-    }else if(getPatn()==PatternConstants.PTN_VMCM) {
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_SFX_N));      
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));        
-    }else if(getPatn()==PatternConstants.PTN_VMXM) {
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_COPULA));      
-      buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));        
-    }else if(getPatn()==PatternConstants.PTN_VMXMJ) {
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_COPULA));      
-      buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));  
-      if(getPomi()!=null) 
-        buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));  
-      buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));      
-      buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));                
-    }
-    return buff.toString();
   }
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java Sat Oct 19 21:16:29 2013
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ko.mo
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
 
@@ -29,13 +28,7 @@ import org.apache.lucene.analysis.ko.dic
  */
 public class CompoundNounAnalyzer {
   
-  private static int score = 1;  
-  
   private boolean exactMach  = true;
-  
-  private static Pattern NUM_PATTERN = Pattern.compile("^[0-9\\.,]+$");
-
-  private static Pattern ALPHANUM_PATTERN = Pattern.compile("^[0-9A-Za-z\\.,]+$");
     
   public boolean isExactMach() {
     return exactMach;

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java Sat Oct 19 21:16:29 2013
@@ -305,13 +305,12 @@ public class MorphAnalyzer {
    * @param candidates  candidates
    */
   public void analysisWithJosa(String stem, String end, List<AnalysisOutput> candidates) {
-  
     if(stem==null||stem.length()==0) return;  
     
     char[] chrs = MorphUtil.decompose(stem.charAt(stem.length()-1));
     if(!DictionaryUtil.existJosa(end)||
-        (chrs.length==3&&ConstraintUtil.isTwoJosa(end))||
-        (chrs.length==2&&(ConstraintUtil.isThreeJosa(end))||"".equals(end))) return; // 연결이 가능한 조사가 아니면...
+        (chrs.length==3 && end.length() == 1 && ConstraintUtil.isTwoJosa(end.charAt(0))) ||
+        (chrs.length==2 && (end.length() == 1 && ConstraintUtil.isThreeJosa(end.charAt(0)))||"".equals(end))) return; // 연결이 가능한 조사가 아니면...
 
     AnalysisOutput output = new AnalysisOutput(stem, end, null, PatternConstants.PTN_NJ);
     output.setPos(PatternConstants.POS_NOUN);

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java Sat Oct 19 21:16:29 2013
@@ -42,17 +42,6 @@ public interface PatternConstants {
 
   public static int PTN_ZZZ =  35;  //* 문장부호, KS 완성형 기호열, 단독조사/어미 */  
   
-
-  /**
-   * Definition of sentence types and parts of speech
-   */
-  
-  //*         CLASSIFICATION OF SENTENCE PATTERNS              */
-  public static char SPTN_DECL = 'D';       //* declarative sentence */
-  public static char SPTN_QUES =  'Q';      //* question sentence    */
-  public static char SPTN_IMPR =  'I';       //* imperative sentence  */
-  public static char SPTN_TITL =  'T';       //* title of a paragraph */
-
   //*          CLASSIFICATION OF PARTS OF SPEECH               */
   //  3(basic) + 2(special) types of stem for 'pos'
   public static char POS_NPXM  =   'N';       //* noun, pnoun, xn, nume */
@@ -103,191 +92,4 @@ public interface PatternConstants {
 
   public static char POS_ETC   =   'Z';       //* not decided yet       */
 
-  /* ASCII stem may be classified as follows: NOT USED YET    */
-  public static char POS_ALPHA  =  'A';       //* English alphabet      */
-  public static char POS_NUMBER =  '#';       //* Arabic numbers        */
-  public static char POS_SMARK  =  'R';       //* sentence markers      */
-
-  public static char POS_NVERBK  = 'Y';       //* guessed as noun+verb  */
-
-  public static char POS_SQUOTE  = 's';       //* single quotation      */
-  public static char POS_DQUOTE  = 'd';       //* double quotation      */
-  public static char POS_LPAREN  = 'l';       //* left parenthesis      */
-  public static char POS_RPAREN  = 'r';       //* right parenthesis     */
-  
-  
-  /**----------------------  불규칙 변형 유형  ------------------------ */  
-  public static char IRR_TYPE_DI = 'd';  //* ㄷ 불규칙
-  public static char IRR_TYPE_BI = 'b'; //* ㅂ 불규칙
-  public static char IRR_TYPE_SI = 's'; //* ㅅ 불규칙
-  public static char IRR_TYPE_HI = 'h'; //* ㅎ 불규칙
-  public static char IRR_TYPE_RO = 'r'; //* 러 불규칙
-  public static char IRR_TYPE_LO = 'l'; //* 르 불규칙
-  public static char IRR_TYPE_OU = 'o'; // * 우 불규칙
-  public static char IRR_TYPE_GU = 'g'; // *거라 불규칙
-  public static char IRR_TYPE_NU = 'n'; // * 너라 불규칙
-  public static char IRR_TYPE_YO = 'y'; // * 여 불규칙
-  public static char IRR_TYPE_LI = 'L'; // * ㄹ 탈락
-  public static char IRR_TYPE_UO = 'u'; //으 탈락
-  public static char IRR_TYPE_AH = 'a'; // 아 탈락
-  public static char IRR_TYPE_AE = 'e'; // 애 축약
-  public static char IRR_TYPE_WA = 'w'; // 와 축약
-  public static char IRR_TYPE_EI = 'e'; // 이 축약
-  public static char IRR_TYPE_OE = 'O'; // 외 축약
-    
-  
-  /**-----------------------  조사의 변이체 유형  ------------------------
-   *
-   *JOSA_VAR_WiAb  -- '은/는', '이/가', '을/를', '와/과', '아/야' 구분 정보
-   *  '는/가/를/와/야'인 경우에 이 값이 set.
-   *JOSA_VAR_Wz_tal  -- '로/으로' 구분 정보 (예) '학교로' --> '학교'+'으로'
-   *  '으'가 탈락되어 복원된 경우에 이 값이 set.
-   *JOSA_VAR_Wi_tal  -- 조사 '고'와 '이고', '라고'와 '이라고' 구분 정보
-   *  '이'가 생략되어 복원한 경우에 이 값이 set.
-   *JOSA_VAR_Wg_tal  -- 조사 '에서'의 '에' 생략 정보
-   *  '에'가 생략되어 복원한 경우에 이 값이 set. '학교서' --> '학교'+'에서'
-   *
-   *JOSA_VAR_nameWi  -- 인명 뒤에 조음소 '이' 추가되는 경우
-   *  '이'를 분석결과에서 삭제한 경우. '승식'+'(이)가'
-   *  <참고> 이 경우는 항상 JOSA_VAR_WiAb 현상을 동반한다.
-   *JOSA_VAR_preWi  -- 서술격 조사 '이' 앞에 조사가 오는 경우
-   *  (예) '에서/부터/에서부터/대로' + '이' + '다'
-   *JOSA_VAR_preWi2  -- 서술격 조사 '이' 앞에 조사 & '에' 탈락된 경우
-   *  (예) '학교서다' --> '학교'+'(에)서'+'(이)'+'다'
-   *
-   *JOSA_VAR_Ag  -- '에게'의 변이체 '게'
-   *JOSA_VAR_Bg  -- '에게'의 변이체 '께'
-   *JOSA_VAR_hbDtg  -- '에게'의 변이체 '한테'
-   *
-   *$$$ 현재, '승식이한테'의 경우에 정보가 불충분한 점이 있음.
-   *-------------------------  조사의 변이체 유형  ------------------------*/
-
-  //  Values for 'jomi.josa'.
-  public static int JOSA_VAR_WiAb  =1;
-  public static int JOSA_VAR_Wz_tal  =2;
-  public static int JOSA_VAR_Wi_tal  =3;
-  public static int JOSA_VAR_Wg_tal  =4;
-
-  public static int JOSA_VAR_nameWi  =5;
-  public static int JOSA_VAR_preWi  =6;
-  public static int JOSA_VAR_preWi2  =7;
-
-  //Values for 'jomi.josaAgBg'.
-  public static int JOSA_VAR_Ag  =1;
-  public static int JOSA_VAR_Bg  =2;
-  public static int JOSA_VAR_hbDtg  =3;
-
-  /**---------------------  어말어미의 변이체 유형  ----------------------
-   *
-   *EOMI_VAR_Wb  -- '아'
-   *EOMI_VAR_Wf  -- '어'
-   *EOMI_VAR_Wj  -- '여' : '여/거/너/러/라'
-   *EOMI_VAR_Wb_tal  -- '아' 탈락
-   *EOMI_VAR_Wf_tal  -- '어' 탈락
-   *EOMI_VAR_b  -- 'ㅏ'
-   *EOMI_VAR_f  -- 'ㅓ'
-   *EOMI_VAR_j  -- 'ㅕ'
-   *EOMI_VAR_c  -- 'ㅐ'  ---> '해서', '까매서/하얘서' 등 ㅎ-불규칙
-   *EOMI_VAR_lc  -- 'ㅙ'  ---> '되다'에만 적용
-   *EOMI_VAR_If, Ib -- '러' 불규칙인 경우
-   *
-   *EOMI_VAR_Wz_tal  -- 종성 'ㄴ/ㄹ/ㅁ/ㅂ' 및 초성 'ㄹ/ㅁ'으로 시작되는 어미에서 '으' 탈락
-   *  <주의> '나/냐/느'로 시작되는 어미들은 '으' 탈락으로 간주하지 않음
-   *EOMI_VAR_Uz_tal  -- '습니다'에서 '스' 탈락
-   *
-   *EOMI_VAR_xv_Wf  -- 보조용언 앞에 오는 어미가 '아/어'
-   *EOMI_VAR_xv_Al  -- 보조용언 앞에 오는 어미가 '고'
-   *EOMI_VAR_xv_Ag  -- 보조용언 앞에 오는 어미가 '게'
-   *
-   *EOMI_VAR_Wi_tal  -- 무종성 용언 뒤에서 서술격 조사 '이' 생략
-   *
-   *$$$ '아/어'에 대한 변이체는 보조용언 앞에 오는 '아/어'에도 적용됨.
-   *
-   *-----------------------  어말어미의 변이체 유형  ----------------------*/
-
-  //Values for 'jomi.eomi' or 'jomi.xomi'.
-  public static int EOMI_VAR_Wb  =1;
-  public static int EOMI_VAR_Wf  =2;
-  public static int EOMI_VAR_Wj  =3;
-  public static int EOMI_VAR_Wb_tal  =4;
-  public static int EOMI_VAR_Wf_tal  =5;
-  public static int EOMI_VAR_b  =6;
-  public static int EOMI_VAR_f  =7;
-  public static int EOMI_VAR_j  =8;
-  public static int EOMI_VAR_c  =9;
-  public static int EOMI_VAR_lc  =10;
-  public static int EOMI_VAR_If  =11;
-  public static int EOMI_VAR_Ib  =12;
-
-  public static int EOMI_VAR_Wz_tal  =13;
-  public static int EOMI_VAR_Uz_tal  =14;
-
-  public static int EOMI_VAR_Wi_tal  =15;
-
-  //Values for 'jomi.xomitype'.
-  public static int EOMI_VAR_xv_Wf  =0; // 아/어
-  public static int EOMI_VAR_xv_Al  =1; // ê³ 
-  public static int EOMI_VAR_xv_Ag  =2;  // 게
-  public static int EOMI_VAR_xv_Xi  =11; // 지
-
-  /**---------------------  선어말어미의 변이체 유형  ----------------------
-   *
-   *  선어말어미의 변이체 유형 --- 두 가지 정보를 표현
-   *
-   *    1. '시' 앞에 조음소 '으'가 오는 경우
-   *    2. '았/었'의 변이체 정보
-   *
-   *  POMI_VAR_WbV  -- '았'
-   *  POMI_VAR_WfV  -- '었'
-   *  POMI_VAR_WjV  -- '였'
-   *  POMI_VAR_V  -- 'ㅆ'
-   *  POMI_VAR_bV  -- 'ㅏㅆ'
-   *  POMI_VAR_fV  -- 'ㅓㅆ'
-   *  POMI_VAR_jV  -- 'ㅕㅆ'
-   *  POMI_VAR_cV  -- 'ㅐㅆ'  ---> '했다'에만 적용됨
-   *  POMI_VAR_lcV  -- 'ㅙㅆ'  ---> '됐다'에만 적용됨
-   *  POMI_VAR_IfV  -- '렀' ---> '러' 불규칙인 경우
-   *  POMI_VAR_WzUi  -- '으시' & '었'
-   *  POMI_VAR_WzUjV  -- '으시' & 'ㅕㅆ', 즉 '으셨'
-   *
-   *-----------------------  선어말어미의 변이체 유형  ----------------------*/
-
-  //  Values for 'jomi.pomi'.
-  public static int POMI_VAR_WbV  =1;
-  public static int POMI_VAR_WfV  =2;
-  public static int POMI_VAR_WjV  =3;
-  public static int POMI_VAR_V  =4;
-  public static int POMI_VAR_bV  =5;
-  public static int POMI_VAR_fV  =6;
-  public static int POMI_VAR_jV  =7;
-  public static int POMI_VAR_cV  =8;
-  public static int POMI_VAR_lcV  =9;
-  public static int POMI_VAR_IfV  =10;
-  public static int POMI_VAR_WzUi  =11;
-  public static int POMI_VAR_WzUjV  =12;
-
-  /**---------------------  조사/어미 외 기타 정보 ----------------------
-   *
-   *  RMA_RESULT --- the result is got from 'hangul.rma'
-   *  GUESS_ABBR --- verb stem is guessed as abbr. 'ki/kg/Zi/...'
-   *
-   *  GUESS_CNOUN -- stem is guessed as noun + noun + ...
-   *  GUESS_PNOUN  -- proper noun with Jongsong: articulative 'Wi' dropped.
-   *
-   *  GUESS_NPREF -- noun stem is guessed as prefix 'Gc/Ul' + noun
-   *  GUESS_VPREF -- verb stem is guessed as prefix 'WbD/QlU' + verb
-   *  GUESS_NVERB -- verb stem is guessed as noun + verb + ...
-   *
-   *-----------------------  조사/어미 외 기타 정보 ----------------------*/
-
-  //  Values for 'jomi.zzz'.
-  public static int RMA_RESULT  =1;
-  public static int GUESS_ABBR  =2;
-
-  public static int GUESS_CNOUN  =3;
-  public static int GUESS_PNOUN  =4;
-
-  public static int GUESS_NPREF  =5;
-  public static int GUESS_VPREF  =6;
-  public static int GUESS_NVERB  =7;
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java Sat Oct 19 21:16:29 2013
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ko.mo
 
 import java.util.Comparator;
 
-public class WSOuputComparator implements Comparator<AnalysisOutput> {
+class WSOuputComparator implements Comparator<AnalysisOutput> {
 
   public int compare(AnalysisOutput o1, AnalysisOutput o2) {
     

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java Sat Oct 19 21:16:29 2013
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ko.mo
 import java.util.ArrayList;
 import java.util.List;
 
-public class WSOutput  implements Cloneable {
+class WSOutput  implements Cloneable {
 
   private int lastStart = 0;
   

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java Sat Oct 19 21:16:29 2013
@@ -574,14 +574,4 @@ public class WordSpaceAnalyzer {
     return false;
     
   }
-  
-  private void printCandidate(WSOutput output) {
-    
-    List<AnalysisOutput> os = output.getPhrases();
-    for(AnalysisOutput o : os) {
-      System.out.print(o.toString()+"("+o.getScore()+")| ");
-    }
-    System.out.println("<==");
-    
-  } 
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java Sat Oct 19 21:16:29 2013
@@ -17,98 +17,46 @@ package org.apache.lucene.analysis.ko.ut
  * limitations under the License.
  */
 
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.lucene.analysis.ko.morph.PatternConstants;
-
 /**
  * 결합이 가능한 조건을 처리하는 클래스
  */
 public class ConstraintUtil {
   private ConstraintUtil() {}
-
-  private static Map<String, String> hahes = new HashMap<String, String>(); // "글로벌화해 ", "민족화해" 처럼 화해와 결합이 가능한 명사
-  static {
-    hahes.put("민족", "Y");hahes.put("동서", "Y");hahes.put("남북", "Y");
-  }
   
-  private static Map<String, String> eomiPnouns = new HashMap<String, String>(); 
-  static {
-    eomiPnouns.put("ㄴ", "Y");eomiPnouns.put("ㄹ", "Y");eomiPnouns.put("ㅁ", "Y");
-  }
-  
-  private static Map<Integer, Integer> PTN_MLIST= new HashMap<Integer, Integer>();
-  static {
-    PTN_MLIST.put(PatternConstants.PTN_NSM, PatternConstants.PTN_NSM);
-    PTN_MLIST.put(PatternConstants.PTN_NSMXM, PatternConstants.PTN_NSMXM);
-    PTN_MLIST.put(PatternConstants.PTN_NJCM, PatternConstants.PTN_NJCM);
-    PTN_MLIST.put(PatternConstants.PTN_VM, PatternConstants.PTN_VM);
-    PTN_MLIST.put(PatternConstants.PTN_VMCM, PatternConstants.PTN_VMCM);
-    PTN_MLIST.put(PatternConstants.PTN_VMXM, PatternConstants.PTN_VMXM);
-    PTN_MLIST.put(PatternConstants.PTN_NVM, PatternConstants.PTN_NVM);
-  }
-  
-  private static Map<Integer, Integer> PTN_JLIST= new HashMap<Integer, Integer>();
-  static {
-    PTN_JLIST.put(PatternConstants.PTN_NJ, PatternConstants.PTN_NJ);
-    PTN_JLIST.put(PatternConstants.PTN_NSMJ, PatternConstants.PTN_NSMJ);
-    PTN_JLIST.put(PatternConstants.PTN_VMJ, PatternConstants.PTN_VMJ);
-    PTN_JLIST.put(PatternConstants.PTN_VMXMJ, PatternConstants.PTN_VMXMJ);
-  }
-  
-  private static Map<String, String> WORD_GUKS= new HashMap<String, String>();
-  static {
-    WORD_GUKS.put("날것", "Y");
-    WORD_GUKS.put("들것", "Y");
-    WORD_GUKS.put("별것", "Y");
-    WORD_GUKS.put("찰것", "Y");
-    WORD_GUKS.put("탈것", "Y");
-    WORD_GUKS.put("하잘것", "Y");
+  public static boolean canHaheCompound(String key) {
+    return key.length() == 2 && ("민족".equals(key) || "동서".equals(key) || "남북".equals(key));
   }
   
   // 종성이 있는 음절과 연결될 수 없는 조사
-  private static Map<String, String> JOSA_TWO = new HashMap<String, String>();
-  static {
-    JOSA_TWO.put("가", "Y");
-    JOSA_TWO.put("는", "Y");
-    JOSA_TWO.put("다", "Y");
-    JOSA_TWO.put("나", "Y");
-    JOSA_TWO.put("니", "Y");
-    JOSA_TWO.put("ê³ ", "Y");
-    JOSA_TWO.put("라", "Y");
-    JOSA_TWO.put("와", "Y");
-    JOSA_TWO.put("랑", "Y");
-    JOSA_TWO.put("를", "Y");
-    JOSA_TWO.put("ë©°", "Y");
-    JOSA_TWO.put("든", "Y");
-    JOSA_TWO.put("야", "Y");
-    JOSA_TWO.put("여", "Y");
+  public static boolean isTwoJosa(char josa) {
+    switch (josa) {
+      case '가':
+      case '는':
+      case '다':
+      case '나':
+      case '니':
+      case 'ê³ ':
+      case '라':
+      case '와':
+      case '랑':
+      case '를':
+      case 'ë©°':
+      case '든':
+      case '야':
+      case '여': return true;
+      default: return false;
+    }
   }
   
   // 종성이 없는 음절과 연결될 수 없는 조사
-  private static Map<String, String> JOSA_THREE= new HashMap<String, String>();
-  static {
-    JOSA_THREE.put("ê³¼", "Y");
-    JOSA_THREE.put("은", "Y");
-    JOSA_THREE.put("아", "Y");
-    JOSA_THREE.put("으", "Y");
-    JOSA_THREE.put("은", "Y");
-    JOSA_THREE.put("을", "Y");
-  }
-  
-  public static boolean canHaheCompound(String key) {
-    if(hahes.get(key)!=null) return true;
-    return false;
-  }
-  
-  public static boolean isTwoJosa(String josa) {
-    
-    return (JOSA_TWO.get(josa)!=null);
-    
-  }
-  public static boolean isThreeJosa(String josa) {
-    
-    return (JOSA_THREE.get(josa)!=null);
-  }  
+  public static boolean isThreeJosa(char josa) {
+    switch (josa) {
+      case 'ê³¼':
+      case '은':
+      case '아':
+      case '으':
+      case '을': return true;
+      default: return false;
+    }
+  } 
 }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java Sat Oct 19 21:16:29 2013
@@ -22,11 +22,11 @@ import org.apache.lucene.analysis.ko.dic
 public class EomiUtil {
   private EomiUtil() {}
 
-  public static final String RESULT_FAIL = "0";
+  static final String RESULT_FAIL = "0";
   
-  public static final String RESULT_SUCCESS = "1";
+  static final String RESULT_SUCCESS = "1";
   
-  public static final String[] verbSuffix = {
+  static final String[] verbSuffix = {
       "이","하","되","스럽","스러우","시키","있","없","같","당하","만하","드리","받","나","내"
   };
   
@@ -146,7 +146,7 @@ public class EomiUtil {
     results[1] = pomi;
   }  
   
-  public static boolean IsNLMBSyl(char ech, char lch) {
+  static boolean IsNLMBSyl(char ech, char lch) {
     switch(lch) {
       case 'ㄴ' : 
         return SyllableUtil.hasFeature(ech, SyllableUtil.YNPNA) || SyllableUtil.hasFeature(ech, SyllableUtil.YNPLN);

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java Sat Oct 19 21:16:29 2013
@@ -29,14 +29,18 @@ import org.apache.lucene.analysis.ko.mor
 public class NounUtil {
   private NounUtil() {}
 
-  private static final List<String> DNouns = new ArrayList<String>();
-    
-  static {
-    String[] strs = new String[]{"등", "들","상","간","뿐","별","적"};
-    for(String str:strs) {
-      DNouns.add(str);
+  private static boolean isDNoun(char ch) {
+    switch(ch) {
+      case '등':
+      case '들':
+      case '상':
+      case '간':
+      case '뿐':
+      case '별':
+      case '적': return true;
+      default: return false;
     }
-  };
+  }
     
   /**
    * 
@@ -52,7 +56,6 @@ public class NounUtil {
     if(strlen<2) return false;       
 
     char[] chrs = MorphUtil.decompose(o.getStem().charAt(strlen-1));
-    boolean success = false;
 
     if(o.getStem().charAt(strlen-1)!='기'&&!(chrs.length==3&&chrs[2]=='ㅁ')) return false;
 
@@ -223,7 +226,7 @@ public class NounUtil {
 
     int strlen = output.getStem().length();
     String d = output.getStem().substring(strlen-1);      
-    if(!DNouns.contains(d)) return false;
+    if(d.length() != 1 || !isDNoun(d.charAt(0))) return false;
 
     String s = output.getStem().substring(0, strlen-1);
     output.setNsfx(d);