You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cm...@apache.org on 2013/05/05 05:40:06 UTC
svn commit: r1479234 [3/15] - in /lucene/dev/branches/lucene4956: dev-tools/idea/.idea/ dev-tools/idea/lucene/analysis/arirang/ lucene/analysis/ lucene/analysis/arirang/ lucene/analysis/arirang/src/ lucene/analysis/arirang/src/java/ lucene/analysis/ari...

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordEntry.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordEntry.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordEntry.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordEntry.java Sun May  5 03:39:51 2013
@@ -0,0 +1,95 @@
+package org.apache.lucene.analysis.kr.morph;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class WordEntry {
+
+	public static final int IDX_NOUN = 0;
+	public static final int IDX_VERB = 1;
+	public static final int IDX_BUSA = 2;
+	public static final int IDX_DOV = 3;
+	public static final int IDX_BEV = 4;
+	public static final int IDX_NE = 5;
+	public static final int IDX_ADJ = 6; // íì©ì¬
+	public static final int IDX_NPR = 7;  // ëªì¬ì ë¶ë¥ (M:Measure)
+	public static final int IDX_CNOUNX = 8; 
+	public static final int IDX_REGURA = 9;
+	
+	/**
+	 * ë¨ì´
+	 */
+	private String word;
+	
+	/**
+	 * ë¨ì´í¹ì±
+	 */
+	private char[] features;
+	
+	private List<CompoundEntry> compounds = new ArrayList();
+	
+	public WordEntry() {
+		
+	}
+	
+	public WordEntry(String word) {
+		this.word = word;
+	}
+	
+	public WordEntry(String word, char[] cs) {
+		this.word = word;
+		this.features = cs;
+	}
+	
+	public WordEntry(String word, List c) {
+		this.word = word;
+		this.compounds = c;
+	}
+	
+	public void setWord(String w) {
+		this.word = w;
+	}
+	
+	public String getWord() {
+		return this.word;
+	}
+	
+	public void setFeatures(char[] cs) {
+		this.features = cs;
+	}
+	
+	public char getFeature(int index) {
+		if(features==null||features.length<index) return '0';		
+		return features[index];
+	}
+	
+	public char[] getFeatures() {
+		return this.features;
+	}
+	
+	public void setCompounds(List<CompoundEntry> c) {
+		this.compounds = c;
+	}
+	
+	public List<CompoundEntry> getCompounds() {
+		return this.compounds;
+	}
+	
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordSpaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordSpaceAnalyzer.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordSpaceAnalyzer.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/morph/WordSpaceAnalyzer.java Sun May  5 03:39:51 2013
@@ -0,0 +1,589 @@
+package org.apache.lucene.analysis.kr.morph;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.kr.utils.DictionaryUtil;
+import org.apache.lucene.analysis.kr.utils.MorphUtil;
+import org.apache.lucene.analysis.kr.utils.SyllableUtil;
+import org.apache.lucene.analysis.kr.utils.VerbUtil;
+
+public class WordSpaceAnalyzer {
+
+	private MorphAnalyzer morphAnal;
+	
+	public WordSpaceAnalyzer() {
+		morphAnal = new MorphAnalyzer();
+		morphAnal.setExactCompound(false);
+	}
+	
+	public List<AnalysisOutput> analyze(String input)  throws MorphException {
+
+		List stack = new ArrayList();
+		
+		WSOutput output = new WSOutput();
+		
+		int wStart = 0;
+		
+		int sgCount = -9;
+		
+		Map<Integer, Integer> fCounter = new HashMap();
+		
+		for(int i=0;i<input.length();i++) {						
+			
+			char[] f = SyllableUtil.getFeature(input.charAt(i));
+			
+			String prefix = i==input.length()-1 ? "X" : input.substring(wStart,i+2);					
+			Iterator iter = DictionaryUtil.findWithPrefix(prefix);
+			
+			List<AnalysisOutput> candidates = new ArrayList();		
+			
+			WordEntry entry = null;
+					
+			if(input.charAt(i)=='ì' || input.charAt(i)=='ì' || input.charAt(i)=='ì') {
+				addSingleWord(input.substring(wStart,i), candidates);
+				
+								
+			// ë¤ì ìì ì´ 2ìì  ì´ì ë¨ì´ì í¬í¨ëì´ ìê³  ë§ì§ë§ ìì ì´ ìëë¼ë©´   ëìì°ê¸° ìì¹ê° ìë ê°ë¥ì±ì´ í¬ë¤.
+			// ë¶ì¬, ê´íì¬, ê°íì¬ ë± ë¨ì¼ì´ì¼ ê°ë¥ì±ì¸ ê²½ì° ëìì°ê¸°ê° ê°ë¥íë, 
+			// ì´ ê²½ì°ë ë¤ì ìì ì ì¡°ì¬íì¬ 
+			} else if(i!= input.length()-1 && iter.hasNext()) { 
+				// ìë¬´ì§ë íì§ ìì.
+				sgCount = i;
+			} else if(!iter.hasNext() && 
+					(entry=DictionaryUtil.getBusa(input.substring(wStart,i+1)))!=null) { 				
+				candidates.add(buildSingleOutput(entry));
+				
+			// í ìì ì´ ì¡°ì¬ë ì´ë¯¸ê° ììëë ìì ì¼ ê°ë¥ì±ì´ ìë¤ë©´...	
+			} else if(f[SyllableUtil.IDX_EOGAN]=='1'||f[SyllableUtil.IDX_JOSA1]=='1'){				
+				if(f[SyllableUtil.IDX_JOSA1]=='1') 
+					candidates.addAll(anlysisWithJosa(input.substring(wStart), i-wStart));
+
+				if(f[SyllableUtil.IDX_EOGAN]=='1') 
+					candidates.addAll(anlysisWithEomi(input.substring(wStart), i-wStart));
+			}
+	
+			// í¸ë³´ê° ë  ê°ë¥ì±ì´ ëì ìì¼ë¡ ì ë ¬íë¤.
+			Collections.sort(candidates, new WSOuputComparator());
+			
+			// ê¸¸ì´ê° ê°ì¥ ê¸´ ë¨ì´ë¥¼ ë¨ì¼ì´ë¡ ì¶ê°íë¤.
+			appendSingleWord(candidates);
+			
+			// ë¶ìì ì¤í¨í ë¨ì´ë¥¼ 
+			analysisCompouns(candidates);
+			
+			// í¸ë³´ê° ë  ê°ë¥ì±ì´ ëì ìì¼ë¡ ì ë ¬íë¤.
+			Collections.sort(candidates, new WSOuputComparator());			
+			
+			int reseult = validationAndAppend(output, candidates, input);
+			if(reseult==1) {
+				i = output.getLastEnd()-1;
+				wStart = output.getLastEnd();
+			} else if(reseult==-1) {
+				Integer index = fCounter.get(output.getLastEnd());
+				if(index==null) index = output.getLastEnd();
+				else index = index + 1;
+				i = index;
+				wStart = output.getLastEnd();
+				fCounter.put(output.getLastEnd(), index);				
+			}
+
+		}
+		
+		// ë¶ìì ì¤í¨íìë¤ë©´ ìë ë¬¸ìì´ì ëëë ¤ ì¤ë¤.
+		if(output.getLastEnd()<input.length()) {
+			
+			String source = input.substring(output.getLastEnd());
+			int score = DictionaryUtil.getWord(source)==null ? AnalysisOutput.SCORE_ANALYSIS : AnalysisOutput.SCORE_CORRECT;
+			AnalysisOutput o =new AnalysisOutput(source,null,null,PatternConstants.POS_NOUN,
+					PatternConstants.PTN_N,score);
+			
+			o.setSource(source);
+			output.getPhrases().add(o);
+			morphAnal.confirmCNoun(o);
+			
+		}
+
+		return output.getPhrases();
+	}
+	
+	/**
+	 * ì¡°ì¬ë¡ ëëë ì´êµ¬ë¥¼ ë¶ìíë¤.
+	 * @param snipt
+	 * @param js
+	 * @return
+	 * @throws MorphException
+	 */
+	private List<AnalysisOutput> anlysisWithJosa(String snipt, int js) throws MorphException {
+
+		List<AnalysisOutput> candidates = new ArrayList();
+		if(js<1) return candidates;
+		
+		int jend = findJosaEnd(snipt, js);
+
+		if(jend==-1) return candidates; // íë¹í ì¡°ì¬ê° ìëë¼ë©´...
+	
+		String input = snipt.substring(0,jend);
+
+		boolean josaFlag = true;
+		
+		for(int i=input.length()-1;i>0;i--) {
+			
+			String stem = input.substring(0,i);
+			
+			String josa = input.substring(i);
+
+			char[] feature =  SyllableUtil.getFeature(josa.charAt(0));	
+			
+			if(josaFlag&&feature[SyllableUtil.IDX_JOSA1]=='1') {
+				morphAnal.analysisWithJosa(stem,josa,candidates);				
+			}
+				
+			if(josaFlag&&feature[SyllableUtil.IDX_JOSA2]=='0') josaFlag = false;
+			
+			if(!josaFlag) break;
+			
+		}
+		
+		if(input.length()==1) {
+			AnalysisOutput o =new AnalysisOutput(input,null,null,PatternConstants.POS_NOUN,
+					 PatternConstants.PTN_N,AnalysisOutput.SCORE_ANALYSIS);
+			candidates.add(o);
+		}
+		
+		fillSourceString(input, candidates);
+		
+		return candidates;
+	}
+	
+	/**
+	 * ì¡°ì¬ì ì²«ìì ë¶í° ì¡°ì¬ì 2ìì ì´ìì ì¬ì©ë  ì ìë ìì ì ì¡°ì¬íì¬
+	 * ê°ì¥ í° ì¡°ì¬ë¥¼ ì°¾ëë¤.
+	 * @param snipt
+	 * @param jstart
+	 * @return
+	 * @throws MorphException
+	 */
+	private int findJosaEnd(String snipt, int jstart) throws MorphException {
+		
+		int jend = jstart;
+
+		// [ê²ì]ì´ ëªì¬ë¥¼ ì´ë£¨ë ê²½ì°ë ìë¤.
+		if(snipt.charAt(jstart-1)=='ê²'&&(snipt.charAt(jstart)=='ì')) return jstart+1;
+		
+		if(snipt.length()>jstart+2&&snipt.charAt(jstart+1)=='ì¤') { // ì¬ëì¤ë¬ì´, ìëì¤ë¬ì´ ê°ì ê²½ì°ë¥´ ì²ë¦¬í¨.
+			char[] chrs = MorphUtil.decompose(snipt.charAt(jstart+2));
+
+			if(chrs.length>=2&&chrs[0]=='ã¹'&&chrs[1]=='ã') return -1;
+		}
+		
+		// ì¡°ì¬ì 2ìì ë¡ ì¬ì©ë  ì ë§ì§ë§ ìì ì ì°¾ëë¤.
+		for(int i=jstart+1;i<snipt.length();i++) {
+			char[] f = SyllableUtil.getFeature(snipt.charAt(i));
+			if(f[SyllableUtil.IDX_JOSA2]=='0') break;
+			jend = i;				
+		}
+				
+		int start = jend;
+		boolean hasJosa = false;
+		for(int i=start;i>=jstart;i--) {
+			String str = snipt.substring(jstart,i+1);
+			if(DictionaryUtil.existJosa(str) && !findNounWithinStr(snipt,i,i+2) &&
+					!isNounPart(snipt,jstart)) {
+				jend = i;
+				hasJosa = true;
+				break;
+			}
+		}
+
+		if(!hasJosa) return -1;
+		
+		return jend+1;
+		
+	}
+	
+	/**
+	 * í¥í ê³ì°ì´ë ì ë¬¸ìì´ì ë³´ì¬ì£¼ê¸° ìí´ source string ì ì ì¥íë¤.
+	 * @param source
+	 * @param candidates
+	 */
+	private void fillSourceString(String source, List<AnalysisOutput> candidates) {
+		
+		for(AnalysisOutput o : candidates) {
+			o.setSource(source);
+		}
+		
+	}
+	
+	/**
+	 * ëª©ë¡ì 1ë²ì§ê° ê°ì¥ í° ê¸¸ì´ë¥¼ ê°ì§ë¤.
+	 * @param candidates
+	 */
+	private void appendSingleWord(List<AnalysisOutput> candidates) throws MorphException {
+	
+		if(candidates.size()==0) return;
+		
+		String source = candidates.get(0).getSource();
+		
+		WordEntry entry = DictionaryUtil.getWordExceptVerb(source);
+		
+		if(entry!=null) {
+			candidates.add(buildSingleOutput(entry));
+		} else {
+
+			if(candidates.get(0).getPatn()>PatternConstants.PTN_VM&&
+					candidates.get(0).getPatn()<=PatternConstants.PTN_VMXMJ) return;
+			
+			if(source.length()<5) return;
+			
+			AnalysisOutput o =new AnalysisOutput(source,null,null,PatternConstants.POS_NOUN,
+					 PatternConstants.PTN_N,AnalysisOutput.SCORE_ANALYSIS);
+			o.setSource(source);
+			morphAnal.confirmCNoun(o);			
+			if(o.getScore()==AnalysisOutput.SCORE_CORRECT) candidates.add(o);
+		}				
+	}
+	
+	private void addSingleWord(String source, List<AnalysisOutput> candidates) throws MorphException {
+		
+		WordEntry entry = DictionaryUtil.getWordExceptVerb(source);
+		
+		if(entry!=null) {
+			candidates.add(buildSingleOutput(entry));
+		} else {
+			AnalysisOutput o =new AnalysisOutput(source,null,null,PatternConstants.POS_NOUN,
+					 PatternConstants.PTN_N,AnalysisOutput.SCORE_ANALYSIS);
+			o.setSource(source);
+			morphAnal.confirmCNoun(o);			
+			candidates.add(o);
+		}
+		
+//		Collections.sort(candidates, new WSOuputComparator());
+		
+	}
+	
+	private List anlysisWithEomi(String snipt, int estart) throws MorphException {
+
+		List<AnalysisOutput> candidates = new ArrayList();
+		
+		int eend = findEomiEnd(snipt,estart);		
+
+		// ëì¬ìì ëªì¬ë¶ë¦¬
+		int vstart = 0;
+		for(int i=estart-1;i>=0;i--) {	
+			Iterator iter = DictionaryUtil.findWithPrefix(snipt.substring(i,estart)); 
+			if(iter.hasNext()) vstart=i;
+			else break;
+		}
+			
+		if(snipt.length()>eend &&
+				DictionaryUtil.findWithPrefix(snipt.substring(vstart,eend+1)).hasNext()) 
+			return candidates;	// ë¤ììì ê¹ì§ ë¨ì´ì ì¼ë¶ë¼ë©´.. ë¶í´ë¥¼ ìíë¤.
+		
+		String pvword = null;
+		if(vstart!=0) pvword = snipt.substring(0,vstart);
+			
+		while(true) { // ã¹,ã,ã´ ì´ê¸°ëë¬¸ì ì´ë¯¸ìì¹ë¥¼ ë¤ë¡ ì¡ìëë°, ì©ì¸+ì´ë¯¸ì ííê° ìëë¼ë©´.. ì´êµ¬ ëì íë ì¤ì¸ë¤.
+			String input = snipt.substring(vstart,eend);
+			anlysisWithEomiDetail(input, candidates);				
+			if(candidates.size()==0) break;		
+			if(("ã¹".equals(candidates.get(0).getEomi()) ||
+					"ã".equals(candidates.get(0).getEomi()) ||
+					"ã´".equals(candidates.get(0).getEomi())) &&
+					eend>estart+1 && candidates.get(0).getPatn()!=PatternConstants.PTN_VM &&
+					candidates.get(0).getPatn()!=PatternConstants.PTN_NSM
+					) {
+				eend--;
+			}else if(pvword!=null&&candidates.get(0).getPatn()>=PatternConstants.PTN_VM&& // ëªì¬ + ì©ì¸ ì´êµ¬ ì¤ì.. ì©ì¸ì´êµ¬ë¡ ë¨ì´ë¥¼ ì´ë£¨ë ê²½ì°ë ìë¤.
+					candidates.get(0).getPatn()<=PatternConstants.PTN_VMXMJ && DictionaryUtil.getWord(input)!=null){
+				candidates.clear();
+				break;
+			}else if(pvword!=null&&VerbUtil.verbSuffix(candidates.get(0).getStem())
+					&&DictionaryUtil.getNoun(pvword)!=null){ // ëªì¬ + ì©ì¸í ì ë¯¸ì¬ + ì´ë¯¸ ì²ë¦¬
+				candidates.clear();
+				anlysisWithEomiDetail(snipt.substring(0,eend), candidates);
+				pvword=null;
+				break;				
+			} else {
+				break;
+			}
+		}
+						
+		if(candidates.size()>0&&pvword!=null) {
+			AnalysisOutput o =new AnalysisOutput(pvword,null,null,PatternConstants.POS_NOUN,
+					PatternConstants.PTN_N,AnalysisOutput.SCORE_ANALYSIS);	
+			morphAnal.confirmCNoun(o);
+			
+			List<CompoundEntry> cnouns = o.getCNounList();
+			if(cnouns.size()==0) {
+				boolean is = DictionaryUtil.getWordExceptVerb(pvword)!=null;
+				cnouns.add(new CompoundEntry(pvword,0,is));
+			} 
+			
+			for(AnalysisOutput candidate : candidates) {
+				candidate.getCNounList().addAll(cnouns);
+				candidate.getCNounList().add(new CompoundEntry(candidate.getStem(),0,true));
+				candidate.setStem(pvword+candidate.getStem()); // ì´ë ê² í´ì¼ WSOutput ì ë³µí©ëªì¬ ì²ë¦¬í  ë ì ìì²ë¦¬ë¨
+			}
+			
+		}
+
+		fillSourceString(snipt.substring(0,eend), candidates);
+	
+		return candidates;
+	}
+	
+	private void anlysisWithEomiDetail(String input, List<AnalysisOutput> candidates ) 
+	throws MorphException {
+
+		boolean eomiFlag = true;
+		
+		int strlen = input.length();
+		
+		char ch = input.charAt(strlen-1);
+		char[] feature =  SyllableUtil.getFeature(ch);
+		
+		if(feature[SyllableUtil.IDX_YNPNA]=='1'||feature[SyllableUtil.IDX_YNPLA]=='1'||
+				feature[SyllableUtil.IDX_YNPMA]=='1')
+			morphAnal.analysisWithEomi(input,"",candidates);
+		
+		for(int i=strlen-1;i>0;i--) {
+			
+			String stem = input.substring(0,i);
+			String eomi = input.substring(i);
+
+			feature =  SyllableUtil.getFeature(eomi.charAt(0));		
+			
+			if(eomiFlag) {			
+				morphAnal.analysisWithEomi(stem,eomi,candidates);
+			}			
+			
+			if(eomiFlag&&feature[SyllableUtil.IDX_EOMI2]=='0') eomiFlag = false;
+			
+			if(!eomiFlag) break;
+		}
+		
+	}
+	
+	/**
+	 * ì´ë¯¸ì ì²«ìì ë¶í° ì´ë¯¸ì 1ìì ì´ìì ì¬ì©ë  ì ìë ìì ì ì¡°ì¬íì¬
+	 * ê°ì¥ í° ì¡°ì¬ë¥¼ ì°¾ëë¤.
+	 * @param snipt
+	 * @param jstart
+	 * @return
+	 * @throws MorphException
+	 */
+	private int findEomiEnd(String snipt, int estart) throws MorphException {
+		
+		int jend = 0;
+		
+		String tail = null;
+		char[] chr = MorphUtil.decompose(snipt.charAt(estart));
+		if(chr.length==3 && (chr[2]=='ã´')) {
+			tail = 'ì'+snipt.substring(estart+1);
+		}else if(chr.length==3 && (chr[2]=='ã¹')) {
+			tail = 'ì'+snipt.substring(estart+1);			
+		}else if(chr.length==3 && (chr[2]=='ã')) {
+			tail = 'ìµ'+snipt.substring(estart+1);
+		}else {
+			tail = snipt.substring(estart);
+		}				
+
+		// ì¡°ì¬ì 2ìì ë¡ ì¬ì©ë  ì ë§ì§ë§ ìì ì ì°¾ëë¤.
+		int start = 0;
+		for(int i=1;i<tail.length();i++) {
+			char[] f = SyllableUtil.getFeature(tail.charAt(i));	
+			if(f[SyllableUtil.IDX_EOGAN]=='0') break;
+			start = i;				
+		}
+					
+		for(int i=start;i>0;i--) { // ì°¾ì ì ìëë¼ë 1ìì ì ë°ëì ë°íí´ì¼ íë¤.
+			String str = tail.substring(0,i+1);	
+			char[] chrs = MorphUtil.decompose(tail.charAt(i));	
+			if(DictionaryUtil.existEomi(str) || 
+					(i<2&&chrs.length==3&&(chrs[2]=='ã¹'||chrs[2]=='ã'||chrs[2]=='ã´'))) { // ã,ã¹,ã´ì´ ì°ìë ì©ì¸ì ìë¤, ì¬ì ì ë³´ê³  íì¸ì í´ë³´ì
+				jend = i;
+				break;
+			}
+		}
+		
+		return estart+jend+1;
+		
+	}
+	
+	/**
+	 * validation í íë³´ê° ë  ê°ë¥ì±ì´ ëì ìµìì ê²ì ê²°ê³¼ì ì¶ê°íë¤.
+	 * 
+	 * @param output
+	 * @param candidates
+	 * @param stack
+	 */
+	private int validationAndAppend(WSOutput output, List<AnalysisOutput> candidates, String input)
+	throws MorphException {
+		
+		if(candidates.size()==0) return 0;
+		
+		AnalysisOutput o = candidates.remove(0);		
+		AnalysisOutput po = output.getPhrases().size()>0 ?  output.getPhrases().get(output.getPhrases().size()-1) : null;
+		
+		String ejend = o.getSource().substring(o.getStem().length());
+		
+		char[] chrs = po!=null&&po.getStem().length()>0 ? MorphUtil.decompose(po.getStem().charAt(po.getStem().length()-1)) : null;
+		String pjend = po!=null&&po.getStem().length()>0 ? po.getSource().substring(po.getStem().length()) : null;
+		
+		char ja = 'x'; // ììì ë¬¸ì
+		if(po!=null&&(po.getPatn()==PatternConstants.PTN_VM||po.getPatn()==PatternConstants.PTN_VMCM||po.getPatn()==PatternConstants.PTN_VMXM)) {		
+			char[] chs = MorphUtil.decompose(po.getEomi().charAt(po.getEomi().length()-1));
+			if(chs.length==3) ja=chs[2];
+			else if(chs.length==1) ja=chs[0];			
+		}
+		
+		int nEnd = output.getLastEnd()+o.getSource().length();
+		
+		char[] f = nEnd<input.length() ? SyllableUtil.getFeature(input.charAt(nEnd)) : null;			
+		
+		// ë°¥ë¨¹ê³  ê°ì ê²½ì°ê° ê°ë¥íë.. ë¨¹ê³ ë ëªì¬ê° ìëë¤.
+		if(po!=null&&po.getPatn()==PatternConstants.PTN_N&&candidates.size()>0&&  
+				o.getPatn()==PatternConstants.PTN_VM&&candidates.get(0).getPatn()==PatternConstants.PTN_N) {
+			o = candidates.remove(0); 			
+		}else if(po!=null&&po.getPatn()>=PatternConstants.PTN_VM&&candidates.size()>0&&
+				candidates.get(0).getPatn()==PatternConstants.PTN_N&&
+				(ja=='ã´'||ja=='ã¹')) { // ë¤ëê°ã´, ì¬,ë(e) ë¡ ë¶í´ ë°©ì§
+			o = candidates.remove(0);
+		}
+		
+		//=============================================
+		if(o.getPos()==PatternConstants.POS_NOUN && MorphUtil.hasVerbOnly(o.getStem())) {		
+			output.removeLast();		
+			return -1;
+		}else if(nEnd<input.length() && f[SyllableUtil.IDX_JOSA1]=='1' 
+			&& DictionaryUtil.getNoun(o.getSource())!=null) {
+			return -1;
+		}else if(nEnd<input.length() && o.getScore()==AnalysisOutput.SCORE_ANALYSIS 
+			&& DictionaryUtil.findWithPrefix(ejend+input.charAt(nEnd)).hasNext()) { // ë£¨ì¬íã´ ê¸ííìë¶ìê¸° ë°©ì§
+			return -1;	
+		}else if(po!=null&&po.getPatn()==PatternConstants.PTN_VM&&"ã".equals(po.getEomi())&&
+				o.getStem().equals("í")) { // ë¤ì§ í©ëë¤ ë¡ ë¶ë¦¬ëë ê² ë°©ì§
+			output.removeLast();
+			return -1;	
+		}else if(po!=null&&po.getPatn()==PatternConstants.PTN_N&&VerbUtil.verbSuffix(o.getStem())&&
+				!"ì".equals(o.getStem())) { // ì¬ëë°ë¤, ì¬ëì¤ë¬ì´ì ì²ë¦¬, ê·¸ë¬ë ìì ì ë¨ì´ì ê²°í©íì§ ìëë¤.
+			output.removeLast();
+			return -1;			
+		} else {	
+			output.addPhrase(o);				
+		}
+				
+		return 1;
+	}
+	
+	
+	private AnalysisOutput buildSingleOutput(WordEntry entry) {
+		
+		char pos = PatternConstants.POS_NOUN;
+		
+		int ptn = PatternConstants.PTN_N;
+		
+		if(entry.getFeature(WordEntry.IDX_NOUN)=='0') {
+			pos = PatternConstants.POS_AID;
+			ptn = PatternConstants.PTN_AID;
+		}
+		
+		AnalysisOutput o = new AnalysisOutput(entry.getWord(),null,null,pos,
+				ptn,AnalysisOutput.SCORE_CORRECT);
+		
+		o.setSource(entry.getWord());
+		
+		return o;
+	}
+	
+	private void analysisCompouns(List<AnalysisOutput> candidates) throws MorphException {
+		
+		// ë³µí©ëªì¬ ë¶í´ì¬ë¶ ê²°ì íì¬ ë¶í´
+		boolean changed = false;
+		boolean correct = false;
+		for(AnalysisOutput o:candidates) {
+			
+			if(o.getScore()==AnalysisOutput.SCORE_CORRECT) {
+				if(o.getPatn()!=PatternConstants.PTN_NJ) correct=true;
+				// "íì±íí´"ê° [íì±í(N),í(t),ì´ì¼(e)] ë¶ìì±ê³µíìëë° [íì±/íí´]ë¶í´ëë ê²ì ë°©ì§
+				if("í".equals(o.getVsfx())) break; 
+				continue;
+			}
+
+			if(o.getPatn()<=PatternConstants.PTN_VM&&o.getStem().length()>2) {
+				 if(!(correct&&o.getPatn()==PatternConstants.PTN_N)) morphAnal.confirmCNoun(o);
+				 if(o.getScore()==AnalysisOutput.SCORE_CORRECT) changed=true;
+			}
+		}
+		
+	}
+	
+	/**
+	 * ë¬¸ìì´ì 
+	 * @param str	ë¶ìíê³ ì íë ì ì²´ ë¬¸ìì´
+	 * @param ws	ë¬¸ìì´ìì ëªì¬ë¥¼ ì°¾ë ìììì¹
+	 * @param es	ë¬¸ìì´ìì ëªì¬ë¥¼ ì°¾ë ë ìì¹
+	 * @return
+	 * @throws MorphException
+	 */
+	private boolean findNounWithinStr(String str, int ws, int es) throws MorphException {
+
+		if(str.length()<es) return false;
+				
+		for(int i=es;i<str.length();i++) {
+			char[] f = SyllableUtil.getFeature(str.charAt(i));	
+			if(i==str.length() || (f[SyllableUtil.IDX_JOSA1]=='1')) {				
+				return (DictionaryUtil.getWord(str.substring(ws,i))!=null);
+			}
+		}
+		
+		return false;
+	}
+	
+	private boolean isNounPart(String str, int jstart) throws MorphException  {
+		
+		if(true) return false;
+		
+		for(int i=jstart-1;i>=0;i--) {			
+			if(DictionaryUtil.getWordExceptVerb(str.substring(i,jstart+1))!=null)
+				return true;
+			
+		}
+		
+		
+		return false;
+		
+	}
+	
+	private void printCandidate(WSOutput output) {
+		
+		List<AnalysisOutput> os = output.getPhrases();
+		for(AnalysisOutput o : os) {
+			System.out.print(o.toString()+"("+o.getScore()+")| ");
+		}
+		System.out.println("<==");
+		
+	}	
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/tagging/Tagger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/tagging/Tagger.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/tagging/Tagger.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/tagging/Tagger.java Sun May  5 03:39:51 2013
@@ -0,0 +1,317 @@
+package org.apache.lucene.analysis.kr.tagging;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.analysis.kr.morph.AnalysisOutput;
+import org.apache.lucene.analysis.kr.morph.MorphException;
+import org.apache.lucene.analysis.kr.morph.PatternConstants;
+import org.apache.lucene.analysis.kr.utils.ConstraintUtil;
+import org.apache.lucene.analysis.kr.utils.FileUtil;
+import org.apache.lucene.analysis.kr.utils.KoreanEnv;
+import org.apache.lucene.analysis.kr.utils.StringUtil;
+import org.apache.lucene.analysis.kr.utils.Trie;
+
+
+/**
+ * ì¬ë¬ê°ì ííìë¶ì ê²°ê³¼ ì¤ì ìµì ì ê²ì ì ííë¤.
+ * ì´ í¨ìë ë¬¸ì¥ë¨ìë¡ í¸ì¶ëì´ì¼ íë¤.
+ */
+public class Tagger {
+		
+	private static Trie<String, String[]> occurrences;
+	
+	private static final String tagDicLoc = "tagger.dic";
+	
+	private static final String NILL = "NILL";
+	
+	private static final String NOPATN = "0";
+	
+	private AnalysisOutput po;
+	
+	public AnalysisOutput tagging(String psource, List<AnalysisOutput> pmorphs)  throws MorphException {
+					
+		return tagging(psource, null, pmorphs, null);
+		
+	}
+	
+	public AnalysisOutput tagging(String psource, String rsource, List<AnalysisOutput> pmorphs, List<AnalysisOutput> rmorphs)  throws MorphException {
+
+		if((pmorphs==null||pmorphs.size()==0)&&(rmorphs==null||rmorphs.size()==0)) return null;
+	
+		po = lookupBest(psource, rsource, pmorphs, rmorphs);
+		
+		po.setSource(psource);
+
+		return po;
+		
+	}
+	
+	/**
+	 * poê° NULLì´ ìë ê²½ì°ë§ í¸ì¶ëë¤.
+	 * occurrence.dic ì ë±ë¡ëì´ ìë ê²½ì°ë§.. ìµì ì ì°¾ìì ë°ííë¤.
+	 * 1. ì²«ë²ì§¸ë ì´ê°ì¼ë¡ ììëë ë¬¸ë² ê·ì¹ì ì°¾ëë¤.
+	 * 2. ëë²ì§¸ë íì¸µíì¼ë¡ ììëë ë¬¸ë²ê·ì¹ì ì°¾ëë¤.
+	 * @param morphs
+	 * @return
+	 */
+	private AnalysisOutput lookupBest(String psource,String rsource, List<AnalysisOutput> pmorphs, List<AnalysisOutput> rmorphs)  throws MorphException {
+		
+		if(pmorphs.size()==1) return pmorphs.get(0);
+
+		AnalysisOutput select  = null;
+		if(rmorphs!=null&&rmorphs.size()!=0) select = lookupBestByRWord(psource, rsource, pmorphs, rmorphs);		
+		if(select!=null) return select;
+
+		if(po!=null) select = lookupBestByPWord(psource, pmorphs);
+		
+		if(select!=null) return select;
+
+		return pmorphs.get(0);
+	}
+	
+	/**
+	 * ì ì´ì ì ìí´ íì¬ ì´ì ì ê²°ì íë¤.
+	 * ì ì´ì ì NULLì´ ìëë¤.
+	 * @param source
+	 * @param pmorphs
+	 * @param rmorphs
+	 * @return
+	 * @throws MorphException
+	 */
+	private AnalysisOutput lookupBestByPWord(String rsource, List<AnalysisOutput> rmorphs)  throws MorphException {
+		
+	
+		List<AnalysisOutput> removes = new ArrayList();				
+
+		for(AnalysisOutput morph : rmorphs) {
+	
+			Iterator<String[]> iterw = getGR("F"+rsource+"^W");
+
+			AnalysisOutput best = selectBest(iterw, po.getSource(), rsource, po, morph, true, removes);
+			if(best!=null) return best;						
+
+			Iterator<String[]> iters = getGR("F"+morph.getStem()+"^S");
+			best = selectBest(iters, po.getSource(), rsource, po, morph, true, removes);
+			if(best!=null) return best;				
+			
+		}	
+		
+		for(AnalysisOutput morph : removes) {
+			if(rmorphs.size()>1) rmorphs.remove(morph);
+		}
+		
+		return null;
+		
+	}
+	
+	/**
+	 * ë· ì´ì ì ìí´ íì¬ ì´ì ì´ ê²°ì ëë¤.
+	 * ë· ì´ì ì NULLì´ ìëë¤.
+	 * @param source
+	 * @param pmorphs
+	 * @param rmorphs
+	 * @return
+	 * @throws MorphException
+	 */
+	private AnalysisOutput lookupBestByRWord(String psource, String rsource, List<AnalysisOutput> pmorphs, List<AnalysisOutput> rmorphs)  throws MorphException {
+		
+		List<AnalysisOutput> removes = new ArrayList();
+		
+		for(AnalysisOutput rmorph : rmorphs) {
+			
+			if(rmorph.getScore()!=AnalysisOutput.SCORE_CORRECT) break;
+			
+			String rend = rmorph.getJosa();
+			if(rend==null) rend = rmorph.getEomi();						
+			
+			for(AnalysisOutput pmorph : pmorphs) {						
+			
+				Iterator<String[]> iterw = getGR("R"+psource+"^W/");
+				
+				String pend = pmorph.getJosa();
+				if(pend==null) pend = pmorph.getEomi();
+				
+				AnalysisOutput best = selectBest(iterw, psource, rsource, pmorph, rmorph, false, removes);
+				if(best!=null) return best;	
+								
+				Iterator<String[]> iters = getGR("R"+NILL+"/"+pend+"/");	
+				best = selectBest(iters, psource, rsource, pmorph, rmorph, false, removes);
+				if(best!=null) return best;	
+				
+				iters = getGR("R"+pmorph.getStem()+"^S/");	
+				best = selectBest(iters, psource, rsource, pmorph, rmorph, false, removes);
+				if(best!=null) return best;					
+				
+			}
+						
+		}		
+		
+		for(AnalysisOutput morph : removes) {
+			if(pmorphs.size()>1) pmorphs.remove(morph);
+		}
+		
+		return null;
+		
+	}
+	
+	private AnalysisOutput selectBest(Iterator<String[]> iter, String psource, String rsource, 
+			AnalysisOutput pmorph, AnalysisOutput rmorph, boolean rear, List removes) {
+
+		while(iter.hasNext()) {		
+
+			String[] values = iter.next();
+		
+			if(checkGrammer(values, psource, rsource, pmorph, rmorph, rear)) {
+				if(rear) return rmorph;
+				else return pmorph;
+			} else if("1".equals(values[6])) {
+				if(!removes.contains(pmorph)) removes.add(pmorph);
+				break;
+			}				
+		}
+		
+		return null;
+		
+	}
+	
+	private boolean checkGrammer(String[] values, String psource, String rsource, AnalysisOutput pmorph, AnalysisOutput rmorph, boolean depFront) {
+		
+		boolean ok = true;		
+		
+		String pend = pmorph.getJosa();
+		if(pend==null) pend = pmorph.getEomi();
+
+		String rend = rmorph.getJosa();
+		if(rend==null) rend = rmorph.getEomi();
+
+		if(depFront&&!NILL.equals(values[0])&&!checkWord(psource,values[0],pmorph)) { // ì ì´ì ì ì´í
+			return false;
+		} 			
+
+		if(!NILL.equals(values[1])&& !checkEomi(values[1], pend)) { // ì ì´ì ì ì´ë¯¸
+			return false;
+		}
+
+		if(!NOPATN.equals(values[2])&&!checkPattern(values[2], pmorph.getPatn())) {// ì ì´ì ì í¨í´
+			return false;
+		} 	
+
+		if(!depFront&&!NILL.equals(values[3])&&!checkWord(rsource,values[3],rmorph)) { // ë· ì´ì ì ì´í
+			return false;			
+		}
+
+		if(!NILL.equals(values[4])&& !checkEomi(values[4], rend)) { // ë· ì´ì ì ì´ë¯¸
+			return false;
+		}
+
+		if(!NOPATN.equals(values[5]) && !checkPattern(values[5], rmorph.getPatn())) { // ë· ì´ì ì í¨í´
+			return false;
+		}
+
+		return true;
+		
+	}
+	
+	private boolean checkWord(String source, String value, AnalysisOutput morph) {		
+		
+		String[] types = StringUtil.split(value,"^");
+		String[] strs  = StringUtil.split(types[0],",");
+		
+		String text = source;
+		if("S".equals(types[1])) text = morph.getStem();		
+	
+		for(int i=0;i<strs.length;i++) {
+			if(strs[i].equals(text)) return true;
+		}
+		
+		return false;
+	}
+	
+	private boolean checkEomi(String value, String rend) {
+		
+		String[] strs  = StringUtil.split(value,",");
+		
+		for(int i=0;i<strs.length;i++) {
+			if(strs[i].equals(rend)) return true;
+		}
+		
+		return false;		
+	}
+	
+	private boolean checkPattern(String value, int ptn) {
+		
+		String[] strs  = StringUtil.split(value,",");
+		String strPtn = Integer.toString(ptn);
+		
+		for(int i=0;i<strs.length;i++) {
+			
+			if("E".equals(strs[i])&&ConstraintUtil.isEomiPhrase(ptn))
+				return true;
+			else if("J".equals(strs[i])&&
+					(ConstraintUtil.isJosaNounPhrase(ptn)||ptn==PatternConstants.PTN_N)) 
+				return true;			
+			else if(strs[i].equals(strPtn)) 
+				return true;
+			
+		}
+		
+		return false;		
+	}
+	
+	public static synchronized Iterator<String[]> getGR(String prefix) throws MorphException {
+
+		if(occurrences==null) loadTaggerDic();
+		
+		return occurrences.getPrefixedBy(prefix);
+	}
+	
+	private static synchronized void loadTaggerDic() throws MorphException {
+		
+		occurrences = new Trie(true);
+		
+		try {
+			
+			List<String> strs = FileUtil.readLines(KoreanEnv.getInstance().getValue(tagDicLoc), "UTF-8");
+			
+			for(String str : strs) {
+				if(str==null) continue;
+				str = str.trim();
+				String[] syls = StringUtil.split(str,":");
+				if(syls.length!=4) continue;
+				
+				String key = null;				
+				if("F".equals(syls[0])) key = syls[2].substring(0,syls[2].lastIndexOf("/")+1) + syls[1].substring(0,syls[1].lastIndexOf("/"));
+				else key = syls[1].substring(0,syls[1].lastIndexOf("/")+1) + syls[2].substring(0,syls[2].lastIndexOf("/"));
+				
+				String[] patns = StringUtil.split(syls[1]+"/"+syls[2]+"/"+syls[3],"/");
+				
+				occurrences.add(syls[0]+key, patns);
+				
+			}			
+			
+		} catch (Exception e) {
+			throw new MorphException("Fail to read the tagger dictionary.("+tagDicLoc+")\n"+e.getMessage());
+		}
+	}
+		
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/ConstraintUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/ConstraintUtil.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/ConstraintUtil.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/ConstraintUtil.java Sun May  5 03:39:51 2013
@@ -0,0 +1,165 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.kr.morph.PatternConstants;
+
+/**
+ * ê²°í©ì´ ê°ë¥í ì¡°ê±´ì ì²ë¦¬íë í´ëì¤
+ */
+public class ConstraintUtil {
+
+	private static Map hahes = new HashMap(); // "ê¸ë¡ë²íí´ ", "ë¯¼ì¡±íí´" ì²ë¼ íí´ì ê²°í©ì´ ê°ë¥í ëªì¬
+	static {
+		hahes.put("ë¯¼ì¡±", "Y");hahes.put("ëì", "Y");hahes.put("ë¨ë¶", "Y");
+	}
+	
+	private static Map eomiPnouns = new HashMap(); 
+	static {
+		eomiPnouns.put("ã´", "Y");eomiPnouns.put("ã¹", "Y");eomiPnouns.put("ã", "Y");
+	}
+	
+	private static Map PTN_MLIST= new HashMap();
+	static {
+		PTN_MLIST.put(PatternConstants.PTN_NSM, PatternConstants.PTN_NSM);
+		PTN_MLIST.put(PatternConstants.PTN_NSMXM, PatternConstants.PTN_NSMXM);
+		PTN_MLIST.put(PatternConstants.PTN_NJCM, PatternConstants.PTN_NJCM);
+		PTN_MLIST.put(PatternConstants.PTN_VM, PatternConstants.PTN_VM);
+		PTN_MLIST.put(PatternConstants.PTN_VMCM, PatternConstants.PTN_VMCM);
+		PTN_MLIST.put(PatternConstants.PTN_VMXM, PatternConstants.PTN_VMXM);
+		PTN_MLIST.put(PatternConstants.PTN_NVM, PatternConstants.PTN_NVM);
+	}
+	
+	private static Map PTN_JLIST= new HashMap();
+	static {
+		PTN_JLIST.put(PatternConstants.PTN_NJ, PatternConstants.PTN_NJ);
+		PTN_JLIST.put(PatternConstants.PTN_NSMJ, PatternConstants.PTN_NSMJ);
+		PTN_JLIST.put(PatternConstants.PTN_VMJ, PatternConstants.PTN_VMJ);
+		PTN_JLIST.put(PatternConstants.PTN_VMXMJ, PatternConstants.PTN_VMXMJ);
+	}
+	
+	private static Map WORD_GUKS= new HashMap();
+	static {
+		WORD_GUKS.put("ë ê²", "Y");
+		WORD_GUKS.put("ë¤ê²", "Y");
+		WORD_GUKS.put("ë³ê²", "Y");
+		WORD_GUKS.put("ì°°ê²", "Y");
+		WORD_GUKS.put("íê²", "Y");
+		WORD_GUKS.put("íìê²", "Y");
+	}
+	
+	// ì¢ì±ì´ ìë ìì ê³¼ ì°ê²°ë  ì ìë ì¡°ì¬
+	private static Map JOSA_TWO= new HashMap();
+	static {
+		JOSA_TWO.put("ê°", "Y");
+		JOSA_TWO.put("ë", "Y");
+		JOSA_TWO.put("ë¤", "Y");
+		JOSA_TWO.put("ë", "Y");
+		JOSA_TWO.put("ë", "Y");
+		JOSA_TWO.put("ê³ ", "Y");
+		JOSA_TWO.put("ë¼", "Y");
+		JOSA_TWO.put("ì", "Y");
+		JOSA_TWO.put("ë", "Y");
+		JOSA_TWO.put("ë¥¼", "Y");
+		JOSA_TWO.put("ë©°", "Y");
+		JOSA_TWO.put("ë ", "Y");
+		JOSA_TWO.put("ì¼", "Y");
+		JOSA_TWO.put("ì¬", "Y");
+	}
+	
+	// ì¢ì±ì´ ìë ìì ê³¼ ì°ê²°ë  ì ìë ì¡°ì¬
+	private static Map JOSA_THREE= new HashMap();
+	static {
+		JOSA_THREE.put("ê³¼", "Y");
+		JOSA_THREE.put("ì", "Y");
+		JOSA_THREE.put("ì", "Y");
+		JOSA_THREE.put("ì¼", "Y");
+		JOSA_THREE.put("ì", "Y");
+		JOSA_THREE.put("ì", "Y");
+	}
+	
+	public static boolean canHaheCompound(String key) {
+		if(hahes.get(key)!=null) return true;
+		return false;
+	}
+		
+	/**
+	 * ì´ë¯¸ê° ã´,ã¹,ã ì¼ë¡ ëëëì§ ì¡°ì¬íë¤.
+	 * @param eomi
+	 * @return
+	 */
+	public static boolean isNLM(String eomi) {
+		
+		if(eomi==null || "".equals(eomi)) return false;
+		
+		if(eomiPnouns.get(eomi)!=null) return true;
+		
+		char[] chrs = MorphUtil.decompose(eomi.charAt(eomi.length()-1));
+		if(chrs.length==3  && eomiPnouns.get(Character.toString(chrs[2]))!=null) return true;
+		
+		return true;
+		
+	}
+	
+	public static boolean isEomiPhrase(int ptn) {
+		
+		if(PTN_MLIST.get(ptn)!=null) return true;
+		
+		return false;
+		
+	}
+	
+	public static boolean isJosaNounPhrase(int ptn) {
+		
+		if(PTN_JLIST.get(ptn)!=null) return true;
+		
+		return false;
+		
+	}
+	
+	public static boolean isJosaAdvPhrase(int ptn) {
+		
+		if(PatternConstants.PTN_ADVJ==ptn) return true;
+		
+		return false;
+		
+	}
+	
+	public static boolean isAdvPhrase(int ptn) {
+		
+		if(PatternConstants.PTN_ADVJ==ptn || PatternConstants.PTN_AID==ptn) return true;
+		
+		return false;
+		
+	}
+	
+	public static boolean isTwoJosa(String josa) {
+		
+		return (JOSA_TWO.get(josa)!=null);
+		
+	}
+	public static boolean isThreeJosa(String josa) {
+		
+		return (JOSA_THREE.get(josa)!=null);
+		
+	}	
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java Sun May  5 03:39:51 2013
@@ -0,0 +1,308 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.analysis.kr.morph.CompoundEntry;
+import org.apache.lucene.analysis.kr.morph.MorphException;
+import org.apache.lucene.analysis.kr.morph.WordEntry;
+
+public class DictionaryUtil {
+	
+	private static Trie<String,WordEntry> dictionary;
+	
+	private static HashMap josas;
+	
+	private static HashMap eomis;
+	
+	private static HashMap prefixs;
+	
+	private static HashMap suffixs;
+	
+	private static HashMap<String,WordEntry> uncompounds;
+	
+	private static HashMap<String, String> cjwords;
+	
+	/**
+	 * ì¬ì ì ë¡ëíë¤.
+	 */
+	public synchronized static void loadDictionary() throws MorphException {
+		
+		dictionary = new Trie<String, WordEntry>(true);
+		List<String> strList = null;
+		List<String> compounds = null;
+		try {
+			strList = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_DICTIONARY),"UTF-8");
+			strList.addAll(FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_EXTENSION),"UTF-8"));
+			compounds = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_COMPOUNDS),"UTF-8");			
+		} catch (IOException e) {			
+			new MorphException(e.getMessage(),e);
+		} catch (Exception e) {
+			new MorphException(e.getMessage(),e);
+		}
+		if(strList==null) throw new MorphException("dictionary is null");;
+		
+		for(String str:strList) {
+			String[] infos = StringUtil.split(str,",");
+			if(infos.length!=2) continue;
+			infos[1] = infos[1].trim();
+			if(infos[1].length()==6) infos[1] = infos[1].substring(0,5)+"000"+infos[1].substring(5);
+			
+			WordEntry entry = new WordEntry(infos[0].trim(),infos[1].trim().toCharArray());
+			dictionary.add(entry.getWord(), entry);
+		}
+		
+		for(String compound: compounds) {		
+			String[] infos = StringUtil.split(compound,":");
+			if(infos.length!=2) continue;
+			WordEntry entry = new WordEntry(infos[0].trim(),"20000000X".toCharArray());
+			entry.setCompounds(compoundArrayToList(infos[1], StringUtil.split(infos[1],",")));
+			dictionary.add(entry.getWord(), entry);
+		}
+	}
+	
+	public static Iterator findWithPrefix(String prefix) throws MorphException {
+		if(dictionary==null) loadDictionary();
+		return dictionary.getPrefixedBy(prefix);
+	}
+
+	public static WordEntry getWord(String key) throws MorphException {		
+		if(dictionary==null) loadDictionary();
+		if(key.length()==0) return null;
+		
+		return (WordEntry)dictionary.get(key);
+	}
+	
+	public static WordEntry getWordExceptVerb(String key) throws MorphException {		
+		WordEntry entry = getWord(key);		
+		if(entry==null) return null;
+		
+		if(entry.getFeature(WordEntry.IDX_NOUN)=='1'||
+				entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
+		return null;
+	}
+	
+	public static WordEntry getNoun(String key) throws MorphException {	
+
+		WordEntry entry = getWord(key);
+		if(entry==null) return null;
+		
+		if(entry.getFeature(WordEntry.IDX_NOUN)=='1') return entry;
+		return null;
+	}
+	
+	public static WordEntry getCNoun(String key) throws MorphException {	
+
+		WordEntry entry = getWord(key);
+		if(entry==null) return null;
+
+		if(entry.getFeature(WordEntry.IDX_NOUN)=='1' || entry.getFeature(WordEntry.IDX_NOUN)=='2') return entry;
+		return null;
+	}
+	
+	public static WordEntry getVerb(String key) throws MorphException {
+		
+		WordEntry entry = getWord(key);	
+		if(entry==null) return null;
+
+		if(entry.getFeature(WordEntry.IDX_VERB)=='1') {
+			return entry;
+		}
+		return null;
+	}
+	
+	public static WordEntry getAdverb(String key) throws MorphException {
+		WordEntry entry = getWord(key);
+		if(entry==null) return null;
+
+		if(entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
+		return null;
+	}
+	
+	public static WordEntry getBusa(String key) throws MorphException {
+		WordEntry entry = getWord(key);
+		if(entry==null) return null;
+
+		if(entry.getFeature(WordEntry.IDX_BUSA)=='1'&&entry.getFeature(WordEntry.IDX_NOUN)=='0') return entry;
+		return null;
+	}
+	
+	public static WordEntry getIrrVerb(String key, char irrType) throws MorphException {
+		WordEntry entry = getWord(key);
+		if(entry==null) return null;
+
+		if(entry.getFeature(WordEntry.IDX_VERB)=='1'&&
+				entry.getFeature(WordEntry.IDX_REGURA)==irrType) return entry;
+		return null;
+	}
+	
+	public static WordEntry getBeVerb(String key) throws MorphException {
+		WordEntry entry = getWord(key);
+		if(entry==null) return null;
+		
+		if(entry.getFeature(WordEntry.IDX_BEV)=='1') return entry;
+		return null;
+	}
+	
+	public static WordEntry getDoVerb(String key) throws MorphException {
+		WordEntry entry = getWord(key);
+		if(entry==null) return null;
+		
+		if(entry.getFeature(WordEntry.IDX_DOV)=='1') return entry;
+		return null;
+	}
+	
+	public static WordEntry getUncompound(String key) throws MorphException {
+		
+		try {
+			if(uncompounds==null) {
+				uncompounds = new HashMap();
+				List<String> lines = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_UNCOMPOUNDS),"UTF-8");	
+				for(String compound: lines) {		
+					String[] infos = StringUtil.split(compound,":");
+					if(infos.length!=2) continue;
+					WordEntry entry = new WordEntry(infos[0].trim(),"90000X".toCharArray());
+					entry.setCompounds(compoundArrayToList(infos[1], StringUtil.split(infos[1],",")));
+					uncompounds.put(entry.getWord(), entry);
+				}			
+			}	
+		}catch(Exception e) {
+			throw new MorphException(e);
+		}
+		return uncompounds.get(key);
+	}
+	
+	public static String getCJWord(String key) throws MorphException {
+		
+		try {
+			if(cjwords==null) {
+				cjwords = new HashMap();
+				List<String> lines = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_CJ),"UTF-8");	
+				for(String cj: lines) {		
+					String[] infos = StringUtil.split(cj,":");
+					if(infos.length!=2) continue;
+					cjwords.put(infos[0], infos[1]);
+				}			
+			}	
+		}catch(Exception e) {
+			throw new MorphException(e);
+		}
+		return cjwords.get(key);
+		
+	}
+	
+	public static boolean existJosa(String str) throws MorphException {
+		if(josas==null) {
+			josas = new HashMap();
+			readFile(josas,KoreanEnv.FILE_JOSA);
+		}	
+		if(josas.get(str)==null) return false;
+		else return true;
+	}
+	
+	public static boolean existEomi(String str)  throws MorphException {
+		if(eomis==null) {
+			eomis = new HashMap();
+			readFile(eomis,KoreanEnv.FILE_EOMI);
+		}
+
+		if(eomis.get(str)==null) return false;
+		else return true;
+	}
+	
+	public static boolean existPrefix(String str)  throws MorphException {
+		if(prefixs==null) {
+			prefixs = new HashMap();
+			readFile(prefixs,KoreanEnv.FILE_PREFIX);
+		}
+
+		if(prefixs.get(str)==null) return false;
+		else return true;
+	}
+	
+	public static boolean existSuffix(String str)  throws MorphException {
+		if(suffixs==null) {
+			suffixs = new HashMap();
+			readFile(suffixs,KoreanEnv.FILE_SUFFIX);
+		}
+
+		if(suffixs.get(str)!=null) return true;
+		
+		return false;
+	}
+	
+	/**
+	 * ã´,ã¹,ã,ãê³¼ eomi ê° ê²°í©íì¬ ì´ë¯¸ê° ë  ì ìëì§ ì ê²íë¤.
+	 * @param s
+	 * @param end
+	 * @return
+	 */
+	public static String combineAndEomiCheck(char s, String eomi) throws MorphException {
+	
+		if(eomi==null) eomi="";
+
+		if(s=='ã´') eomi = "ì"+eomi;
+		else if(s=='ã¹') eomi = "ì"+eomi;
+		else if(s=='ã') eomi = "ì"+eomi;
+		else if(s=='ã') eomi = "ìµ"+eomi;
+		else eomi = s+eomi;
+
+		if(existEomi(eomi)) return eomi;		
+
+		return null;
+		
+	}
+	
+	/**
+	 * 
+	 * @param map
+	 * @param type	1: josa, 2: eomi
+	 * @throws MorphException
+	 */
+	private static synchronized void readFile(HashMap map, String dic) throws MorphException {		
+		
+		String path = KoreanEnv.getInstance().getValue(dic);
+
+		try{
+			List<String> line = FileUtil.readLines(path,"UTF-8");
+			for(int i=1;i<line.size();i++) {
+				map.put(line.get(i).trim(), line.get(i));
+			}
+		}catch(IOException e) {
+ 		    throw new MorphException(e.getMessage(),e);
+		} catch (Exception e) {
+ 		    throw new MorphException(e.getMessage(),e);
+		}
+	}
+	
+	private static List compoundArrayToList(String source, String[] arr) {
+		List list = new ArrayList();
+		for(String str: arr) {
+			CompoundEntry ce = new CompoundEntry(str);
+			ce.setOffset(source.indexOf(str));
+			list.add(ce);
+		}
+		return list;
+	}
+}
+

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java Sun May  5 03:39:51 2013
@@ -0,0 +1,665 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.kr.morph.AnalysisOutput;
+import org.apache.lucene.analysis.kr.morph.MorphException;
+import org.apache.lucene.analysis.kr.morph.PatternConstants;
+
+public class EomiUtil {
+
+	
+	public static final String RESULT_FAIL = "0";
+	
+	public static final String RESULT_SUCCESS = "1";
+	
+	public static final String[] verbSuffix = {
+		  "ì´","í","ë","ì¤ë½","ì¤ë¬ì°","ìí¤","ì","ì","ê°","ë¹í","ë§í","ëë¦¬","ë°","ë","ë´"
+	};
+	
+	/**
+	 * ê°ì¥ ê¸¸ì´ê° ê¸´ ì´ë¯¸ë¥¼ ë¶ë¦¬íë¤.
+	 * @param term
+	 * @return
+	 * @throws MorphException
+	 */
+	public static String[] longestEomi(String term) throws MorphException  {
+		
+		String[] result = new String[2];
+		result[0] = term;
+		
+		String stem;
+		String eomi;
+		char[] efeature;
+		
+		for(int i=term.length();i>0;i--) {
+			
+			stem = term.substring(0,i);			
+		
+			if(i!=term.length()) {
+				eomi = term.substring(i);
+				efeature  = SyllableUtil.getFeature(eomi.charAt(0));				
+			} else {
+				efeature = SyllableUtil.getFeature(stem.charAt(i-1));
+				eomi="";
+			}
+
+			if(SyllableUtil.isAlpanumeric(stem.charAt(i-1))) break;
+			
+			char[] jasos = MorphUtil.decompose(stem.charAt(i-1));
+	
+			if(!"".equals(eomi)&&!DictionaryUtil.existEomi(eomi)) {
+				// do not anything.
+			} else if(jasos.length>2&&
+					(jasos[2]=='ã´'||jasos[2]=='ã¹'||jasos[2]=='ã'||jasos[2]=='ã')&&
+					DictionaryUtil.combineAndEomiCheck(jasos[2], eomi)!=null) {
+				result[0] = Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 0));
+				if(i!=0) result[0] = stem.substring(0,i-1)+result[0];
+				result[1] = Character.toString(jasos[2]);
+			}else if(i>0&&(stem.endsWith("í")&&"ì¬".equals(eomi))||
+					(stem.endsWith("ê°")&&"ê±°ë¼".equals(eomi))||
+					(stem.endsWith("ì¤")&&"ëë¼".equals(eomi))) {
+				result[0] = stem;
+				result[1] = eomi;			
+			}else if(jasos.length==2&&(!stem.endsWith("ì")&&!stem.endsWith("ì´"))&&
+					(jasos[1]=='ã'||jasos[1]=='ã'||jasos[1]=='ã'||jasos[1]=='ã')&&
+					(DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {		
+				char[] chs = MorphUtil.decompose(stem.charAt(stem.length()-1));				
+				result[0] = stem;
+				result[1] = "ì´"+eomi;
+			}else if((jasos[1]=='ã'||jasos[1]=='ã'||jasos[1]=='ã'||jasos[1]=='ã'||jasos[1]=='ã')&&
+					(DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {				
+				String end = "";				
+				if(jasos[1]=='ã')
+					end=MorphUtil.makeChar(stem.charAt(i-1), 8, 0)+"ì";	
+				else if(jasos[1]=='ã')
+					end=MorphUtil.makeChar(stem.charAt(i-1), 13, 0)+"ì´";	
+				else if(jasos[1]=='ã')
+					end=Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 6, 0));
+				else if(jasos[1]=='ã')
+					end=MorphUtil.makeChar(stem.charAt(i-1), 0, 0)+"ì´";	
+				else if(jasos[1]=='ã')
+					end=MorphUtil.makeChar(stem.charAt(i-1), 20, 0)+"ì ";										
+				
+				if(jasos.length==3) {					
+					end = end.substring(0,end.length()-1)+MorphUtil.replaceJongsung(end.charAt(end.length()-1),stem.charAt(i-1));
+				}
+				
+				if(stem.length()<2) result[0] = end;
+				else result[0] = stem.substring(0,stem.length()-1)+end;
+				result[1] = eomi;	
+				
+			}else if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI1]!='0'&&
+				DictionaryUtil.existEomi(eomi)) {
+				if(!(((jasos.length==2&&jasos[0]=='ã¹')||(jasos.length==3&&jasos[2]=='ã¹'))&&eomi.equals("ë¬"))) { // ã¹ ë¶ê·ì¹ì ìì¸
+					result[0] = stem;
+					result[1] = eomi;
+				}
+			}
+
+			if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI2]=='0') break;
+		}	
+
+		return result;
+		
+	}	
+	
+	/**
+	 * ì ì´ë§ì´ë¯¸ë¥¼ ë¶ìíë¤.
+	 * @param stem
+	 * @return
+	 */
+	public static String[] splitPomi(String stem) throws MorphException  {
+
+		//	 results[0]:ì±ê³µ(1)/ì¤í¨(0), results[1]: ì´ê·¼, results[2]: ì ì´ë§ì´ë¯¸
+		String[] results = new String[2];  
+		results[0] = stem;
+
+		if(stem==null||stem.length()==0||"ì".equals(stem)) return results;
+	
+		char[] chrs = stem.toCharArray();
+		int len = chrs.length;
+		String pomi = "";
+		int index = len-1;
+	
+		char[] jaso = MorphUtil.decompose(chrs[index]);
+		if(chrs[index]!='ì'&&chrs[index]!='ã'&&jaso[jaso.length-1]!='ã') return results;  // ì ì´ë§ì´ë¯¸ê° ë°ê²¬ëì§ ììë¤
+		
+		if(chrs[index]=='ê² ') {
+			pomi = "ê² ";
+			setPomiResult(results,stem.substring(0,index),pomi);		
+			if(--index<=0||
+					(chrs[index]!='ì'&&chrs[index]!='ã'&&jaso[jaso.length-1]!='ã')) 
+				return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
+			jaso = MorphUtil.decompose(chrs[index]);
+		}
+
+		if(chrs[index]=='ì') { // ìì, ãì, ì
+			pomi = chrs[index]+pomi;	
+			setPomiResult(results,stem.substring(0,index),pomi);		
+			if(--index<=0||
+					(chrs[index]!='ì'&&chrs[index]!='ã'&&jaso[jaso.length-1]!='ã')) 
+				return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...				
+			jaso = MorphUtil.decompose(chrs[index]);
+		}
+
+		if(chrs[index]=='ì'){
+			pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;	
+			if(index>0&&chrs[index-1]=='í') 
+				stem = stem.substring(0,index);	
+			else
+				 stem = stem.substring(0,index)+"ì´";
+			setPomiResult(results,stem,pomi);	
+		}else if(chrs[index]=='ì¨'){
+				pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;	
+				stem = stem.substring(0,index);		
+				setPomiResult(results,stem,"ì"+pomi);				
+		}else if(chrs[index]=='ì'||chrs[index]=='ì') {
+			pomi = chrs[index]+pomi;	
+			setPomiResult(results,stem.substring(0,index),pomi);		
+			if(--index<=0||
+					(chrs[index]!='ì'&&chrs[index]!='ì¼')) return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...				
+			jaso = MorphUtil.decompose(chrs[index]);		
+		}else if(jaso.length==3&&jaso[2]=='ã') {
+		
+			 if(jaso[0]=='ã'&&jaso[1]=='ã') {			 
+				pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;	
+				stem = stem.substring(0,index)+"í";	
+			}else if(jaso[0]!='ã'&&(jaso[1]=='ã'||jaso[1]=='ã'||jaso[1]=='ã'||jaso[1]=='ã')) {		
+				pomi = "ì"+pomi;
+				stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index], 0);				
+			}else if(jaso[0]!='ã'&&(jaso[1]=='ã')) {
+				pomi = "ì"+pomi;
+				stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],11, 0);				
+			} else if(jaso[1]=='ã') {			
+				pomi = MorphUtil.replaceJongsung('ì',chrs[index])+pomi;	
+				stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],8, 0);
+			} else if(jaso[1]=='ã') {
+				pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;	
+				stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],13, 0);
+			} else if(jaso[1]=='ã') {					
+				pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;				
+				stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],20, 0);					
+			} else if(jaso[1]=='ã') {
+				pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
+				stem = stem.substring(0,index);
+			} else if(jaso[1]=='ã') {
+				pomi = MorphUtil.replaceJongsung('ì ',chrs[index])+pomi;	
+				stem = stem.substring(0,index);
+			} else {
+				pomi = "ì"+pomi;
+			}
+			setPomiResult(results,stem,pomi);				
+			if(chrs[index]!='ì'&&chrs[index]!='ì¼') return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...				
+			jaso = MorphUtil.decompose(chrs[index]);				
+		}
+
+		char[] nChrs = null;
+		if(index>0) nChrs = MorphUtil.decompose(chrs[index-1]);
+		else nChrs = new char[2];
+
+		if(nChrs.length==2&&chrs[index]=='ì'&&(chrs.length<=index+1||
+				(chrs.length>index+1&&chrs[index+1]!='ì¨'))) {
+			if(DictionaryUtil.getWord(results[0])!=null) return results;  //'ì'ê° í¬í¨ë ë¨ì´ê° ìë¤. ì±ê°ìë¤/ëìë¤/ë¤ì¤ìë¤ 
+			pomi = chrs[index]+pomi;	
+			setPomiResult(results,stem.substring(0,index),pomi);			
+			if(--index==0||chrs[index]!='ì¼') return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...				
+			jaso = MorphUtil.decompose(chrs[index]);
+		}
+		
+		if(index>0) nChrs = MorphUtil.decompose(chrs[index-1]);
+		else nChrs = new char[2];
+		if(chrs.length>index+1&&nChrs.length==3&&(chrs[index+1]=='ì¨'||chrs[index+1]=='ì')&&chrs[index]=='ì¼') {
+			pomi = chrs[index]+pomi;	
+			setPomiResult(results,stem.substring(0,index),pomi);		
+		}
+	
+		return results;
+	}
+	
+	/**
+	 * ë¶ê·ì¹ ì©ì¸ì ìíì êµ¬íë¤.
+	 * @param output
+	 * @return
+	 * @throws MorphException
+	 */
+	public static List irregular(AnalysisOutput output) throws MorphException {
+		
+		List results = new ArrayList();
+	
+		if(output.getStem()==null||output.getStem().length()==0) 
+			return results;		
+		
+		String ending = output.getEomi();
+		if(output.getPomi()!=null) ending = output.getPomi();
+		
+		List<String[]> irrs = new ArrayList();
+		
+		irregularStem(irrs,output.getStem(),ending);
+		irregularEnding(irrs,output.getStem(),ending);
+		irregularAO(irrs,output.getStem(),ending);
+
+		try {
+			for(String[] irr: irrs) {
+				AnalysisOutput result = output.clone();
+				result.setStem(irr[0]);
+				if(output.getPatn()==PatternConstants.PTN_VM) {
+					if(output.getPomi()==null) result.setEomi(irr[1]);
+					else result.setPomi(irr[1]);
+				}	
+				results.add(result);
+			}				
+		} catch (CloneNotSupportedException e) {
+			throw new MorphException(e.getMessage(),e);
+		}
+				
+		return results;
+		
+	}
+	
+	/**
+	 * ì´ê°ë§ ë³íë ê²½ì°
+	 * @param results
+	 * @param stem
+	 * @param ending
+	 */
+	private static void irregularStem(List results, String stem, String ending) {	
+
+		char feCh = ending.charAt(0);
+		char[] fechJaso =  MorphUtil.decompose(feCh);
+		char ls = stem.charAt(stem.length()-1);
+		char[] lsJaso = MorphUtil.decompose(ls);
+	
+		if(feCh=='ì'||feCh=='ì´'||feCh=='ì¼') {
+			if(lsJaso[lsJaso.length-1]=='ã¹') { // ã· ë¶ê·ì¹
+				results.add(
+						new String[]{stem.substring(0,stem.length()-1)+
+								MorphUtil.makeChar(stem.charAt(stem.length()-1),7)
+								,ending
+								,String.valueOf(PatternConstants.IRR_TYPE_DI)});
+			} else if(lsJaso.length==2) { // ã ë¶ê·ì¹
+				results.add(
+						new String[]{stem.substring(0,stem.length()-1)+
+								MorphUtil.makeChar(stem.charAt(stem.length()-1),19)
+								,ending
+								,String.valueOf(PatternConstants.IRR_TYPE_SI)});				
+			}			
+		}
+		
+		if((fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã'||	feCh=='ì¤'||feCh=='ì')
+				&&(ls=='ì°')) { // ã ë¶ê·ì¹
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+							MorphUtil.makeChar(stem.charAt(stem.length()-1),17)
+							,ending
+							,String.valueOf(PatternConstants.IRR_TYPE_BI)});				
+		}
+		
+		if((fechJaso[0]=='ã´'||fechJaso[0]=='ã'||fechJaso[0]=='ã'||	feCh=='ì¤')
+				&&(lsJaso.length==2)) { // ã¹ íë½
+
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+							MorphUtil.makeChar(stem.charAt(stem.length()-1),8)
+							,ending
+							,String.valueOf(PatternConstants.IRR_TYPE_LI)});			
+		}
+		
+		if(lsJaso.length==2
+				&&(fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã'||fechJaso[0]=='ã'||
+					lsJaso[1]=='ã'||lsJaso[1]=='ã'||lsJaso[1]=='ã'||lsJaso[1]=='ã')
+					&&!"ë".equals(stem)) { // ã ë¶ê·ì¹, ê·¸ë¬ë [ë³ë¤]ë ã ë¶ê·ì¹ì´ ìëë¤.
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+							MorphUtil.makeChar(stem.charAt(stem.length()-1),27)
+							,ending
+							,String.valueOf(PatternConstants.IRR_TYPE_HI)});			
+		}		
+	}
+	
+	/**
+	 * ì´ë¯¸ë§ ë³íë ê²½ì°
+	 * @param results
+	 * @param stem
+	 * @param ending
+	 */
+	private static void irregularEnding(List results, String stem, String ending) {
+		if(ending.startsWith("ã")) return;
+		
+		char feCh = ending.charAt(0);
+		char ls = stem.charAt(stem.length()-1);
+
+		if(feCh=='ë¬'&&ls=='ë¥´') { // 'ë¬' ë¶ê·ì¹
+			results.add(
+					new String[]{stem
+							,"ì´"+ending.substring(1)
+							,String.valueOf(PatternConstants.IRR_TYPE_RO)});				
+		} else if("ë¼".equals(ending)&&"ê°ê±°".equals(stem)) { // 'ê±°ë¼' ë¶ê·ì¹
+			results.add( 
+					new String[]{stem.substring(0,stem.length()-1)
+							,"ì´ë¼"
+							,String.valueOf(PatternConstants.IRR_TYPE_GU)});							
+		} else if("ë¼".equals(ending)&&"ì¤ë".equals(stem)) { // 'ëë¼' ë¶ê·ì¹
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)
+							,"ì´ë¼"
+							,String.valueOf(PatternConstants.IRR_TYPE_NU)});			
+		}
+		
+		if("ì¬".equals(ending)&&ls=='í') { // 'ì¬' ë¶ê·ì¹
+			results.add(
+					new String[]{stem
+							,"ì´"
+							,String.valueOf(PatternConstants.IRR_TYPE_NU)});				
+		}
+	}
+	
+	/**
+	 * ì´ê°ê³¼ ì´ë¯¸ê° ëª¨ë ë³íë ê²½ì°
+	 * @param results
+	 * @param stem
+	 * @param ending
+	 */
+	private static void irregularAO(List results, String stem, String ending) {
+		
+		char ls = stem.charAt(stem.length()-1);
+		char[] lsJaso = MorphUtil.decompose(ls);
+		
+		if(lsJaso.length<2) return;
+		
+		if(lsJaso[1]=='ã') {
+			if(stem.endsWith("ëì")||stem.endsWith("ê³ ì")) { // 'ê³±ë¤', 'ëë¤'ì 'ã' ë¶ê·ì¹
+				results.add(
+						new String[]{stem.substring(0,stem.length()-2)+
+								MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã'
+								,makeTesnseEomi("ì",ending)
+								,String.valueOf(PatternConstants.IRR_TYPE_BI)});					
+			}else { // 'ì' ì¶ì½
+				results.add(
+						new String[]{stem.substring(0,stem.length()-1)+
+								MorphUtil.makeChar(stem.charAt(stem.length()-1),8,0) // ìì + ã 
+								,makeTesnseEomi("ì",ending)
+								,String.valueOf(PatternConstants.IRR_TYPE_WA)});				
+			}
+		} else if(stem.endsWith("í¼")) {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+							MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0) // ìì + - 
+							,makeTesnseEomi("ì´",ending)
+							,String.valueOf(PatternConstants.IRR_TYPE_WA)});	
+		} else if(lsJaso[1]=='ã') {
+			if(stem.length()>=2) // 'ã' ë¶ê·ì¹
+				results.add(
+					new String[]{stem.substring(0,stem.length()-2)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã'
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_BI)});	
+
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+							MorphUtil.makeChar(stem.charAt(stem.length()-1),13,0) // ìì + ã 
+							,makeTesnseEomi("ì´",ending)
+							,String.valueOf(PatternConstants.IRR_TYPE_WA)});	
+		} else if(stem.length()>=2&&ls=='ë¼') {
+			char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
+			if(ns.length==3&&ns[2]=='ã¹') { // ë¥´ ë¶ê·ì¹
+				results.add(
+						new String[]{stem.substring(0,stem.length()-2)+
+							MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "ë¥´"
+						   ,makeTesnseEomi("ì",ending)
+						   ,String.valueOf(PatternConstants.IRR_TYPE_RO)});					
+			}			
+		} else if(stem.length()>=2&&ls=='ë¬') {
+			char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
+			if(stem.charAt(stem.length()-2)=='ë¥´') { // ë¬ ë¶ê·ì¹
+				results.add(
+						new String[]{stem.substring(0,stem.length()-1)
+						   ,makeTesnseEomi("ì´",ending)
+						   ,String.valueOf(PatternConstants.IRR_TYPE_LO)});	
+			} else if(ns.length==3&&ns[2]=='ã¹') { // ë¥´ ë¶ê·ì¹
+				results.add(
+						new String[]{stem.substring(0,stem.length()-2)+
+							MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "ë¥´"
+						   ,makeTesnseEomi("ì´",ending)
+						   ,String.valueOf(PatternConstants.IRR_TYPE_RO)});	
+			}
+		} else if(stem.endsWith("í´")||stem.endsWith("ì¼")) {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_EI)});	
+		} else if(stem.endsWith("í´")) {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),0,0)
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_EI)});				
+		} else if(lsJaso.length==2&&lsJaso[1]=='ã') {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_UO)});	
+		} else if(lsJaso.length==2&&lsJaso[1]=='ã') {
+			// ì¼ íë½
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_UO)});	
+			//	 ì ë¶ê·ì¹
+			results.add(
+					new String[]{stem
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_AH)});	
+		} else if(lsJaso[1]=='ã') {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_EI)});	
+		} else if(lsJaso[1]=='ã') {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),11,0)
+					   ,makeTesnseEomi("ì´",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_OE)});	
+		} else if(lsJaso[1]=='ã') {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),0,27)
+					   ,makeTesnseEomi("ì",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
+		} else if(lsJaso[1]=='ã') {
+			results.add(
+					new String[]{stem.substring(0,stem.length()-1)+
+						MorphUtil.makeChar(stem.charAt(stem.length()-1),2,27)
+					   ,makeTesnseEomi("ì",ending)
+					   ,String.valueOf(PatternConstants.IRR_TYPE_HI)});							
+		}
+		
+	}
+	
+	/**
+	 * ìì  ì ì´ë¯¸ë§ì ë§ë¤ì´ì ë°ííë¤.
+	 * @param preword  'ì' ëë 'ì´'
+	 * @param endword  ì´ë¯¸[ì ì´ë¯¸ë§ì í¬í¨]
+	 * @return 'ì' ëë 'ì'ì ë§ë¤ì´ì ë°ííë¤.
+	 */
+	public static String makeTesnseEomi(String preword, String endword) {
+
+		if(preword==null||preword.length()==0) return endword;
+		if(endword==null||endword.length()==0) return preword;
+
+		if(endword.charAt(0)=='ã') {
+			return preword.substring(0,preword.length()-1)+
+				MorphUtil.makeChar(preword.charAt(preword.length()-1),20)+endword.substring(1,endword.length());		
+		} else if(endword.charAt(0)=='ã´') {
+			return preword.substring(0,preword.length()-1)+
+			MorphUtil.makeChar(preword.charAt(preword.length()-1),4)+endword.substring(1,endword.length());
+		} else if(endword.charAt(0)=='ã¹') {
+			return preword.substring(0,preword.length()-1)+
+			MorphUtil.makeChar(preword.charAt(preword.length()-1),8)+endword.substring(1,endword.length());	
+		} else if(endword.charAt(0)=='ã') {
+			return preword.substring(0,preword.length()-1)+
+			MorphUtil.makeChar(preword.charAt(preword.length()-1),16)+endword.substring(1,endword.length());					
+		} else if(endword.charAt(0)=='ã') {
+			return preword.substring(0,preword.length()-1)+
+			MorphUtil.makeChar(preword.charAt(preword.length()-1),17)+endword.substring(1,endword.length());
+		}
+		return preword+endword;		
+	}
+	
+	
+ 
+   /**
+    * 'ì/ê¸°' + 'ì´' + ì´ë¯¸, 'ìì/ë¶í°/ììë¶í°' + 'ì´' + ì´ë¯¸ ì¸ì§ ì¡°ì¬íë¤.
+    * @param stem
+    * @return
+    */
+   public static boolean endsWithEEomi(String stem) {
+	   int len = stem.length();
+	   if(len<2||!stem.endsWith("ì´")) return false;
+	  
+	   char[] jasos = MorphUtil.decompose(stem.charAt(len-2));
+	   if(jasos.length==3&&jasos[2]=='ã')
+		   return true;
+	   else {
+		   int index = stem.lastIndexOf("ê¸°");
+		   if(index==-1) index = stem.lastIndexOf("ìì");
+		   if(index==-1) index = stem.lastIndexOf("ë¶í°");
+		   if(index==-1) return false;
+		   return true;
+	   }
+   }
+   
+	private static void setPomiResult(String[] results,String stem, String pomi ) {
+		results[0] = stem;
+		results[1] = pomi;
+	}	
+	
+	/**
+	 * 
+	 * @param ch
+	 * @return
+	 */
+	public static boolean IsNLMBSyl(char ech, char lch) throws MorphException {
+	
+		char[] features = SyllableUtil.getFeature(ech);
+
+		switch(lch) {
+
+			case 'ã´' :
+				return (features[SyllableUtil.IDX_YNPNA]=='1' || features[SyllableUtil.IDX_YNPLN]=='1');				
+			case 'ã¹' :
+				return (features[SyllableUtil.IDX_YNPLA]=='1');
+			case 'ã' :
+				return (features[SyllableUtil.IDX_YNPMA]=='1');		
+			case 'ã' :
+				return (features[SyllableUtil.IDX_YNPBA]=='1');					
+		}
+	
+		return false;
+	}
+	
+	/**
+	 * ì´ë¯¸ë¥¼ ë¶ë¦¬íë¤.
+	 * 
+	 * 1. ê·ì¹ì©ì¸ê³¼ ì´ê°ë§ ë°ëë ë¶ê·ì¹ ì©ì¸
+	 * 2. ì´ë¯¸ê° ì¢ì± 'ã´/ã¹/ã/ã'ì¼ë¡ ììëë ì´ì 
+	 * 3. 'ì¬/ê±°ë¼/ëë¼'ì ë¶ê·ì¹ ì´ì 
+	 * 4. ì´ë¯¸ 'ì/ì´'ê° íë½ëë ì´ì 
+	 * 5. 'ì/ì´'ì ë³ì´ì²´ ë¶ë¦¬
+	 * 
+	 * @param stem
+	 * @param end
+	 * @return
+	 * @throws MorphException
+	 */
+	public static String[] splitEomi(String stem, String end) throws MorphException {
+
+		String[] strs = new String[2];
+		int strlen = stem.length();
+		if(strlen==0) return strs;
+
+		char estem = stem.charAt(strlen-1);
+		char[] chrs = MorphUtil.decompose(estem);
+		if(chrs.length==1) return strs; // íê¸ì´ ìëë¼ë©´...
+
+		if((chrs.length==3)&&(chrs[2]=='ã´'||chrs[2]=='ã¹'||chrs[2]=='ã'||chrs[2]=='ã')&&
+				EomiUtil.IsNLMBSyl(estem,chrs[2])&&
+				DictionaryUtil.combineAndEomiCheck(chrs[2], end)!=null) {		
+			strs[1] = Character.toString(chrs[2]);
+			if(end.length()>0) strs[1] += end;
+			strs[0] = stem.substring(0,strlen-1) + MorphUtil.makeChar(estem, 0);	
+		} else if(estem=='í´'&&DictionaryUtil.existEomi("ì´"+end)) {			
+			strs[0] = stem.substring(0,strlen-1)+"í";
+			strs[1] = "ì´"+end;	
+		} else if(estem=='í'&&DictionaryUtil.existEomi("ì´"+end)) {			
+			strs[0] = stem.substring(0,strlen-1)+"í";
+			strs[1] = "ì´"+end;				
+		} else if(chrs[0]!='ã'&&
+				(chrs[1]=='ã'||chrs[1]=='ã'||chrs[1]=='ã'||chrs[1]=='ã')&&
+				(chrs.length==2 || SyllableUtil.getFeature(estem)[SyllableUtil.IDX_YNPAH]=='1')&&
+				(DictionaryUtil.combineAndEomiCheck('ì´', end)!=null)) {		
+		
+			strs[0] = stem;
+			if(chrs.length==2) strs[1] = "ì´"+end;	
+			else strs[1] = end;	
+		} else if(stem.endsWith("í")&&"ì¬".equals(end)) {			
+			strs[0] = stem;
+			strs[1] = "ì´";	
+		}else if((chrs.length==2)&&(chrs[1]=='ã'||chrs[1]=='ã'||chrs[1]=='ã'||chrs[1]=='ã'||chrs[1]=='ã'||chrs[1]=='ã')&&
+				(DictionaryUtil.combineAndEomiCheck('ì´', end)!=null)) {		
+	
+			StringBuffer sb = new StringBuffer();
+			
+			if(strlen>1) sb.append(stem.substring(0,strlen-1));
+			
+			if(chrs[1]=='ã')
+				sb.append(MorphUtil.makeChar(estem, 8, 0)).append(MorphUtil.replaceJongsung('ì',estem));	
+			else if(chrs[1]=='ã')
+				sb.append(MorphUtil.makeChar(estem, 13, 0)).append(MorphUtil.replaceJongsung('ì´',estem));	
+			else if(chrs[1]=='ã')
+				sb.append(MorphUtil.makeChar(estem, 11, 0)).append(MorphUtil.replaceJongsung('ì´',estem));				
+			else if(chrs[1]=='ã')
+				sb.append(Character.toString(MorphUtil.makeChar(estem, 20, 0))).append(MorphUtil.replaceJongsung('ì´',estem));
+			else if(chrs[1]=='ã')
+				sb.append(MorphUtil.makeChar(estem, 0, 0)).append(MorphUtil.replaceJongsung('ì´',estem));
+			else if(chrs[1]=='ã')
+				sb.append(MorphUtil.makeChar(estem, 20, 0)).append(MorphUtil.replaceJongsung('ì ',estem));	
+			
+		
+			strs[0] = sb.toString();
+		
+			end = strs[0].substring(strs[0].length()-1)+end;				
+			strs[0] = strs[0].substring(0,strs[0].length()-1);
+			
+			strs[1] = end;		
+
+		}else if(!"".equals(end)&&DictionaryUtil.existEomi(end)) {		
+			strs = new String[]{stem, end};
+		}
+
+		return strs;
+	}
+}