You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cm...@apache.org on 2013/05/05 05:40:06 UTC

svn commit: r1479234 [8/15] - in /lucene/dev/branches/lucene4956: dev-tools/idea/.idea/ dev-tools/idea/lucene/analysis/arirang/ lucene/analysis/ lucene/analysis/arirang/ lucene/analysis/arirang/src/ lucene/analysis/arirang/src/java/ lucene/analysis/ari...

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/SyllableUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/SyllableUtil.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/SyllableUtil.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/SyllableUtil.java Sun May  5 03:39:51 2013
@@ -0,0 +1,136 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.kr.morph.MorphException;
+
+public class SyllableUtil {
+
+	public static int IDX_JOSA1 = 0; // 조사의 첫음절로 사용되는 음절 48개
+	public static int IDX_JOSA2 = 1; // 조사의 두 번째 이상의 음절로 사용되는 음절 58개
+	public static int IDX_EOMI1 = 2; // 어미의 첫음절로 사용되는 음절 72개
+	public static int IDX_EOMI2 = 3; // 어미의 두 번째 이상의 음절로 사용되는 음절 105개
+	public static int IDX_YONG1 = 4; // 1음절 용언에 사용되는 음절 362개
+	public static int IDX_YONG2 = 5; // 2음절 용언의 마지막 음절로 사용되는 음절 316개
+	public static int IDX_YONG3 = 6; // 3음절 이상 용언의 마지막 음절로 사용되는 음절 195개
+	public static int IDX_CHEON1 = 7; // 1음절 체언에 사용되는 음절 680개
+	public static int IDX_CHEON2 = 8; // 2음절 체언의 마지막 음절로 사용되는 음절 916개
+	public static int IDX_CHEON3 = 9; // 3음절 체언의 마지막 음절로 사용되는 음절 800개
+	public static int IDX_CHEON4 = 10; // 4음절 체언의 마지막 음절로 사용되는 음절 610개
+	public static int IDX_CHEON5 = 11; // 5음절 이상 체언의 마지막 음절로 사용되는 음절 330개
+	public static int IDX_BUSA1 = 12; // 1음절 부사의 마지막 음절로 사용되는 음절 191개
+	public static int IDX_BUSA2 = 13; // 2음절 부사의 마지막 음절로 사용되는 음절 519개
+	public static int IDX_BUSA3 = 14; // 3음절 부사의 마지막 음절로 사용되는 음절 139개
+	public static int IDX_BUSA4 = 15; // 4음절 부사의 마지막 음절로 사용되는 음절 366개
+	public static int IDX_BUSA5 = 16; // 5음절 부사의 마지막 음절로 사용되는 음절 79개
+	public static int IDX_PRONOUN = 17; // 대명사의 마지막 음절로 사용되는 음절 77개
+	public static int IDX_EXCLAM = 18; // 관형사와 감탄사의 마지막 음절로 사용되는 음절 241개
+	
+	public static int IDX_YNPNA = 19; // (용언+'-ㄴ')에 의하여 생성되는 음절 129개
+	public static int IDX_YNPLA = 20; // (용언+'-ㄹ')에 의해 생성되는 음절 129개
+	public static int IDX_YNPMA = 21; // (용언+'-ㅁ')에 의해 생성되는 음절 129개
+	public static int IDX_YNPBA = 22; // (용언+'-ㅂ')에 의해 생성되는 음절 129개
+	public static int IDX_YNPAH = 23; // 모음으로 끝나는 음절 129개중 'ㅏ/ㅓ/ㅐ/ㅔ/ㅕ'로 끝나는 것이 선어말 어미 '-었-'과 결합할 때 생성되는 음절
+	public static int IDX_YNPOU = 24; // 모음 'ㅗ/ㅜ'로 끝나는 음절이 '아/어'로 시작되는 어미나 선어말 어미 '-었-'과 결합할 때 생성되는 음절
+	public static int IDX_YNPEI = 25; // 모음 'ㅣ'로 끝나는 용언이 '아/어'로 시작되는 어미나 선어말 어미 '-었-'과 결합할 때 생성되는 음절
+	public static int IDX_YNPOI = 26; // 모음 'ㅚ'로 끝나는 용언이 '아/어'로 시작되는 어미나 선어말 어미 '-었-'과 결합할 때 생성되는 음절
+	public static int IDX_YNPLN = 27; // 받침 'ㄹ'로 끝나는 용언이 어미 '-ㄴ'과 결합할 때 생성되는 음절
+	public static int IDX_IRRLO = 28; // '러' 불규칙(8개)에 의하여 생성되는 음절 : 러, 렀
+	public static int IDX_IRRPLE = 29; // '르' 불규칙(193개)에 의하여 생성되는 음절 
+	public static int IDX_IRROO = 30; // '우' 불규칙에 의하여 생성되는 음절 : 퍼, 펐
+	public static int IDX_IRROU = 31; // '어' 불규칙에 의하여 생성되는 음절 : 해, 했
+	public static int IDX_IRRDA = 32; // 'ㄷ' 불규칙(37개)에 의하여 생성되는 음절
+	public static int IDX_IRRBA = 33; // 'ㅂ' 불규칙(446개)에 의하여 생성되는 음절
+	public static int IDX_IRRSA = 34; // 'ㅅ' 불규칙(39개)에 의하여 생성되는 음절
+	public static int IDX_IRRHA = 35; // 'ㅎ' 불규칙(96개)에 의하여 생성되는 음절 
+	public static int IDX_PEND = 36; // 선어말 어미 : 시 셨 았 었 였 겠
+	
+	public static int IDX_YNPEOMI = 37; // 용언이 어미와 결합할 때 생성되는 음절의 수 734개
+	
+	/**	 용언의 표층 형태로만 사용되는 음절 */
+	public static int IDX_WDSURF = 38; 
+	
+	public static int IDX_EOGAN = 39; // 어미 또는 어미의 변형으로 존재할 수 있는 음 (즉 IDX_EOMI 이거나 IDX_YNPNA 이후에 1이 있는 음절)
+	
+	private static List Syllables;  // 음절특성 정보
+	
+	/**
+	 * 인덱스 값에 해당하는 음절의 특성을 반환한다.
+	 * 영자 또는 숫자일 경우는 모두 해당이 안되므로 가장 마지막 글자인 '힣' 의 음절특성을 반환한다.
+	 * 
+	 * @param idx '가'(0xAC00)이 0부터 유니코드에 의해 한글음절을 순차적으로 나열한 값
+	 * @return
+	 * @throws Exception 
+	 */
+	public static char[] getFeature(int idx)  throws MorphException {
+		
+		if(Syllables==null) Syllables = getSyllableFeature();
+	
+		if(idx<0||idx>=Syllables.size()) 
+			return (char[])Syllables.get(Syllables.size()-1);
+		else 
+			return (char[])Syllables.get(idx);
+		
+	}
+	
+	/**
+	 * 각 음절의 특성을 반환한다.
+	 * @param syl  음절 하나
+	 * @return
+	 * @throws Exception 
+	 */
+	public static char[] getFeature(char syl) throws MorphException {
+		
+		int idx = syl - 0xAC00;
+		return getFeature(idx);
+		
+	}
+	
+	/**
+	 * 음절정보특성을 파일에서 읽는다.
+	 * 
+	 * @return
+	 * @throws Exception
+	 */	
+	private static List getSyllableFeature() throws MorphException {
+	
+		try{
+			Syllables = new ArrayList<char[]>();
+
+			List<String> line = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_SYLLABLE_FEATURE),"UTF-8");	
+			for(int i=0;i<line.size();i++) {				
+				if(i!=0)
+					Syllables.add(line.get(i).toCharArray());
+			}
+		}catch(IOException e) {
+ 		    throw new MorphException(e.getMessage());
+		} 
+
+		return Syllables;
+		
+	}	
+	
+	public static boolean isAlpanumeric(char ch) {
+		return (ch>='0'&&ch<='z');
+	}
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Trie.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Trie.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Trie.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Trie.java Sun May  5 03:39:51 2013
@@ -0,0 +1,830 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.NoSuchElementException;
+
+/**
+ * An information reTRIEval tree, a.k.a., a prefix tree. A Trie is similar to a
+ * dictionary, except that keys must be strings. Furthermore, Trie provides an
+ * efficient means (getPrefixedBy()) to find all values given just a PREFIX of a
+ * key.
+ * <p>
+ * 
+ * All retrieval operations run in O(nm) time, where n is the size of the
+ * key/prefix and m is the size of the alphabet. Some implementations may reduce
+ * this to O(n log m) or even O(n) time. Insertion operations are assumed to be
+ * infrequent and may be slower. The space required is roughly linear with
+ * respect to the sum of the sizes of all keys in the tree, though this may be
+ * reduced if many keys have common prefixes.
+ * <p>
+ * 
+ * The Trie can be set to ignore case. Doing so is the same as making all keys
+ * and prefixes lower case. That means the original keys cannot be extracted
+ * from the Trie.
+ * <p>
+ * 
+ * Restrictions (not necessarily limitations!)
+ * <ul>
+ * <li><b>This class is not synchronized.</b> Do that externally if you
+ * desire.
+ * <li>Keys and values may not be null.
+ * <li>The interface to this is not complete.
+ * </ul>
+ * 
+ * See http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Tree/Trie.html for a
+ * discussion of Tries.
+ */
+public class Trie<S,V> {
+    /**
+     * Our representation consists of a tree of nodes whose edges are labelled
+     * by strings. The first characters of all labels of all edges of a node
+     * must be distinct. Typically the edges are sorted, but this is determined
+     * by TrieNode.
+     * <p>
+     * 
+     * An abstract TrieNode is a mapping from String keys to values, { <K1, V1>,
+     * ..., <KN, VN> }, where all Ki and Kj are distinct for all i != j. For any
+     * node N, define KEY(N) to be the concatenation of all labels on the edges
+     * from the root to that node. Then the abstraction function is:
+     * <p>
+     * 
+     * <blockquote> { <KEY(N), N.getValue() | N is a child of root and
+     * N.getValue() != null} </blockquote>
+     * 
+     * An earlier version used character labels on edges. This made
+     * implementation simpler but used more memory because one node would be
+     * allocated to each character in long strings if that string had no common
+     * prefixes with other elements of the Trie.
+     * <p>
+     * 
+     * <dl>
+     * <dt>INVARIANT:</td>
+     * <dd>For any node N, for any edges Ei and Ej from N,<br>
+     * i != j &lt;==&gt; Ei.getLabel().getCharAt(0) !=
+     * Ej.getLabel().getCharAt(0)</dd>
+     * <dd>Also, all invariants for TrieNode and TrieEdge must hold.</dd>
+     * </dl>
+     */
+    private TrieNode<V> root;
+
+    /**
+     * Indicates whever search keys are case-sensitive or not. If true, keys
+     * will be canonicalized to lowercase.
+     */
+    private boolean ignoreCase;
+
+    /**
+     * The constant EmptyIterator to return when nothing matches.
+     */
+    private final static Iterator EMPTY_ITERATOR = new EmptyIterator();
+
+    /**
+     * Constructs a new, empty tree.
+     */
+    public Trie(boolean ignoreCase) {
+        this.ignoreCase = ignoreCase;
+        clear();
+    }
+
+    /**
+     * Makes this empty.
+     * 
+     * @modifies this
+     */
+    public void clear() {
+        this.root = new TrieNode<V>();
+    }
+
+    /**
+     * Returns the canonical version of the given string.
+     * <p>
+     * 
+     * In the basic version, strings are added and searched without
+     * modification. So this simply returns its parameter s.
+     * <p>
+     * 
+     * Other overrides may also perform a conversion to the NFC form
+     * (interoperable across platforms) or to the NFKC form after removal of
+     * accents and diacritics from the NFKD form (ideal for searches using
+     * strings in natural language).
+     * <p>
+     * 
+     * Made public instead of protected, because the public Prefix operations
+     * below may need to use a coherent conversion of search prefixes.
+     */
+    public String canonicalCase(final String s) {
+        if (!ignoreCase)
+            return s;
+        return s.toUpperCase(Locale.US).toLowerCase(Locale.US);
+    }
+
+    /**
+     * Matches the pattern <tt>b</tt> against the text
+     * <tt>a[startOffset...stopOffset - 1]</tt>.
+     * 
+     * @return the first <tt>j</tt> so that:<br>
+     *         <tt>0 &lt;= i &lt; b.length()</tt> AND<br>
+     *         <tt>a[startOffset + j] != b[j]</tt> [a and b differ]<br>
+     *         OR <tt>stopOffset == startOffset + j</tt> [a is undefined];<br>
+     *         Returns -1 if no such <tt>j</tt> exists, i.e., there is a
+     *         match.<br>
+     *         Examples:
+     *         <ol>
+     *         <li>a = "abcde", startOffset = 0, stopOffset = 5, b = "abc"<br>
+     *         abcde ==&gt; returns -1<br>
+     *         abc
+     *         <li>a = "abcde", startOffset = 1, stopOffset = 5, b = "bXd"<br>
+     *         abcde ==&gt; returns 1 bXd
+     *         <li>a = "abcde", startOffset = 1, stopOffset = 3, b = "bcd"<br>
+     *         abc ==&gt; returns 2<br>
+     *         bcd
+     *         </ol>
+     * 
+     * @requires 0 &lt;= startOffset &lt;= stopOffset &lt;= a.length()
+     */
+    private final int match(String a, int startOffset, int stopOffset, String b) {
+        // j is an index into b
+        // i is a parallel index into a
+        int i = startOffset;
+        for (int j = 0; j < b.length(); j++) {
+            if (i >= stopOffset)
+                return j;
+            if (a.charAt(i) != b.charAt(j))
+                return j;
+            i++;
+        }
+        return -1;
+    }
+
+
+    /**
+     * Maps the given key (which may be empty) to the given value.
+     * 
+     * @return the old value associated with key, or <tt>null</tt> if none
+     * @requires value != null
+     * @modifies this
+     */
+    public V add(String key, V value) {
+        // early conversion of key, for best performance
+        key = canonicalCase(key);
+        // Find the largest prefix of key, key[0..i - 1], already in this.
+        TrieNode<V> node = root;
+        int i = 0;
+        while (i < key.length()) {
+            // Find the edge whose label starts with key[i].
+            TrieEdge<V> edge = node.get(key.charAt(i));
+            if (edge == null) {
+                // 1) Additive insert.
+                TrieNode<V> newNode = new TrieNode<V>(value);
+                node.put(key.substring(i), newNode);
+                return null;
+            }
+            // Now check that rest of label matches
+            String label = edge.getLabel();
+            int j = match(key, i, key.length(), label);
+            if (j >= 0) {
+                // 2) Prefix overlaps perfectly with just part of edge label
+                // Do split insert as follows...
+                //
+                // node node ab = label
+                // ab | ==> a | a = label[0...j - 1] (inclusive)
+                // child intermediate b = label[j...] (inclusive)
+                // b / \ c c = key[i + j...] (inclusive)
+                // child newNode
+                //
+                // ...unless c = "", in which case you just do a "splice
+                // insert" by ommiting newNew and setting intermediate's value.
+                TrieNode<V> child = edge.getChild();
+                TrieNode<V> intermediate = new TrieNode<V>();
+                String a = label.substring(0, j);
+                // Assert.that(canonicalCase(a).equals(a), "Bad edge a");
+                String b = label.substring(j);
+                // Assert.that(canonicalCase(b).equals(b), "Bad edge a");
+                String c = key.substring(i + j);
+                if (c.length() > 0) {
+                    // Split.
+                    TrieNode<V> newNode = new TrieNode<V>(value);
+                    node.remove(label.charAt(0));
+                    node.put(a, intermediate);
+                    intermediate.put(b, child);
+                    intermediate.put(c, newNode);
+                } else {
+                    // Splice.
+                    node.remove(label.charAt(0));
+                    node.put(a, intermediate);
+                    intermediate.put(b, child);
+                    intermediate.setValue(value);
+                }
+                return null;
+            }
+            // Prefix overlaps perfectly with all of edge label.
+            // Keep searching.
+            node = edge.getChild();
+            i += label.length();
+        }
+        // 3) Relabel insert. Prefix already in this, though not necessarily
+        // associated with a value.
+        V ret = node.getValue();
+        node.setValue(value);
+        return ret;
+    }
+
+    /**
+     * Returns the node associated with prefix, or null if none. (internal)
+     */
+    private TrieNode<V> fetch(String prefix) {
+        // This private method uses prefixes already in canonical form.
+        TrieNode<V> node = root;
+        for (int i = 0; i < prefix.length();) {
+            // Find the edge whose label starts with prefix[i].
+            TrieEdge<V> edge = node.get(prefix.charAt(i));
+            if (edge == null)
+                return null;
+            // Now check that rest of label matches.
+            String label = edge.getLabel();
+            int j = match(prefix, i, prefix.length(), label);
+            if (j != -1)
+                return null;
+            i += label.length();
+            node = edge.getChild();
+        }
+        return node;
+    }
+
+    /**
+     * Returns the value associated with the given key, or null if none.
+     * 
+     * @return the <tt>Object</tt> value or <tt>null</tt>
+     */
+    public Object get(String key) {
+        // early conversion of search key
+        key = canonicalCase(key);
+        // search the node associated with key, if it exists
+        TrieNode node = fetch(key);
+        if (node == null)
+            return null;
+        // key exists, return the value
+        return node.getValue();
+    }
+
+    /**
+     * Ensures no values are associated with the given key.
+     * 
+     * @return <tt>true</tt> if any values were actually removed
+     * @modifies this
+     */
+    public boolean remove(String key) {
+        // early conversion of search key
+        key = canonicalCase(key);
+        // search the node associated with key, if it exists
+        TrieNode<V> node = fetch(key);
+        if (node == null)
+            return false;
+        // key exists and can be removed.
+        // TODO: prune unneeded nodes to save space
+        boolean ret = node.getValue() != null;
+        node.setValue(null);
+        return ret;
+    }
+
+    /**
+     * Returns an iterator (of Object) of the values mapped by keys in this that
+     * start with the given prefix, in any order. That is, the returned iterator
+     * contains exactly the values v for which there exists a key k so that
+     * k.startsWith(prefix) and get(k) == v. The remove() operation on the
+     * iterator is unimplemented.
+     * 
+     * @requires this not modified while iterator in use
+     */
+    public Iterator getPrefixedBy(String prefix) {
+        // Early conversion of search key
+        prefix = canonicalCase(prefix);
+        // Note that canonicalization MAY have changed the prefix length!
+        return getPrefixedBy(prefix, 0, prefix.length());
+    }
+
+    /**
+     * Same as getPrefixedBy(prefix.substring(startOffset, stopOffset). This is
+     * useful as an optimization in certain applications to avoid allocations.
+     * <p>
+     * 
+     * Important: canonicalization of prefix substring is NOT performed here!
+     * But it can be performed early on the whole buffer using the public method
+     * <tt>canonicalCase(String)</tt> of this.
+     * 
+     * @requires 0 &lt;= startOffset &lt;= stopOffset &lt;= prefix.length
+     * @see #canonicalCase(String)
+     */
+    public Iterator getPrefixedBy(String prefix, int startOffset, int stopOffset) {
+        // Find the first node for which "prefix" prefixes KEY(node). (See the
+        // implementation overview for a definition of KEY(node).) This code is
+        // similar to fetch(prefix), except that if prefix extends into the
+        // middle of an edge label, that edge's child is considered a match.
+        TrieNode node = root;
+        for (int i = startOffset; i < stopOffset;) {
+            // Find the edge whose label starts with prefix[i].
+            TrieEdge edge = node.get(prefix.charAt(i));
+            if (edge == null) {
+                return EMPTY_ITERATOR;
+            }
+            // Now check that rest of label matches
+            node = edge.getChild();
+            String label = edge.getLabel();
+            int j = match(prefix, i, stopOffset, label);
+            if (i + j == stopOffset) {
+                // a) prefix overlaps perfectly with just part of edge label
+                break;
+            } else if (j >= 0) {
+                // b) prefix and label differ at some point
+                node = null;
+                break;
+            } else {
+                // c) prefix overlaps perfectly with all of edge label.
+            }
+            i += label.length();
+        }
+        // Yield all children of node, including node itself.
+        if (node == null)
+            return EMPTY_ITERATOR;
+        else
+            return new ValueIterator(node);
+    }
+
+    /**
+     * Returns all the (non-null) values associated with a given node and its
+     * children. (internal)
+     */
+    private class ValueIterator extends NodeIterator {
+        ValueIterator(TrieNode start) {
+            super(start, false);
+        }
+
+        // inherits javadoc comment
+        public Object next() {
+            return ((TrieNode) super.next()).getValue();
+        }
+    }
+
+    /**
+     * Yields nothing. (internal)
+     */
+    private static class EmptyIterator implements Iterator {
+        // inherits javadoc comment
+        public boolean hasNext() {
+            return false;
+        }
+
+        // inherits javadoc comment
+        public Object next() {
+            throw new NoSuchElementException();
+        }
+
+        public void remove() {
+
+        }
+    }
+
+    public class NodeIterator extends UnmodifiableIterator {
+        /**
+         * Stack for DFS. Push and pop from back. The last element of stack is
+         * the next node who's value will be returned.
+         * <p>
+         * 
+         * INVARIANT: Top of stack contains the next node with not null value to
+         * pop. All other elements in stack are iterators.
+         */
+        private ArrayList /* of Iterator of TrieNode */stack = new ArrayList();
+        private boolean withNulls;
+
+        /**
+         * Creates a new iterator that yields all the nodes of start and its
+         * children that have values (ignoring internal nodes).
+         */
+        public NodeIterator(TrieNode start, boolean withNulls) {
+            this.withNulls = withNulls;
+            if (withNulls || start.getValue() != null)
+                // node has a value, push it for next
+                stack.add(start);
+            else
+                // scan node children to find the next node
+                advance(start);
+        }
+
+        // inherits javadoc comment
+        public boolean hasNext() {
+            return !stack.isEmpty();
+        }
+
+        // inherits javadoc comment
+        public Object next() {
+            int size;
+            if ((size = stack.size()) == 0)
+                throw new NoSuchElementException();
+            TrieNode node = (TrieNode) stack.remove(size - 1);
+            advance(node);
+            return node;
+        }
+
+        /**
+         * Scan the tree (top-down) starting at the already visited node until
+         * finding an appropriate node with not null value for next(). Keep
+         * unvisited nodes in a stack of siblings iterators. Return either an
+         * empty stack, or a stack whose top will be the next node returned by
+         * next().
+         */
+        private void advance(TrieNode node) {
+            Iterator children = node.childrenForward();
+            while (true) { // scan siblings and their children
+                int size;
+                if (children.hasNext()) {
+                    node = (TrieNode) children.next();
+                    if (children.hasNext()) // save siblings
+                        stack.add(children);
+                    // check current node and scan its sibling if necessary
+                    if (withNulls || node.getValue() == null)
+                        children = node.childrenForward(); // loop from there
+                    else { // node qualifies for next()
+                        stack.add(node);
+                        return; // next node exists
+                    }
+                } else if ((size = stack.size()) == 0)
+                    return; // no next node
+                else
+                    // no more siblings, return to parent
+                    children = (Iterator) stack.remove(size - 1);
+            }
+        }
+    }
+
+    /**
+     * Returns a string representation of the tree state of this, i.e., the
+     * concrete state. (The version of toString commented out below returns a
+     * representation of the abstract state of this.
+     */
+    public String toString() {
+        StringBuffer buf = new StringBuffer();
+        buf.append("<root>"); //$NON-NLS-1$
+        toStringHelper(root, buf, 1);
+        return buf.toString();
+    }
+
+    /**
+     * Prints a description of the substree starting with start to buf. The
+     * printing starts with the given indent level. (internal)
+     */
+    private void toStringHelper(TrieNode start, StringBuffer buf, int indent) {
+        // Print value of node.
+        if (start.getValue() != null) {
+            buf.append(" -> "); //$NON-NLS-1$
+            buf.append(start.getValue().toString());
+        }
+        buf.append("\n"); //$NON-NLS-1$
+        // For each child...
+        for (Iterator iter = start.labelsForward(); iter.hasNext();) {
+            // Indent child appropriately.
+            for (int i = 0; i < indent; i++)
+                buf.append(" "); //$NON-NLS-1$
+            // Print edge.
+            String label = (String) iter.next();
+            buf.append(label);
+            // Recurse to print value.
+            TrieNode child = start.get(label.charAt(0)).getChild();
+            toStringHelper(child, buf, indent + 1);
+        }
+    }
+}
+
+/**
+ * A node of the Trie. Each Trie has a list of children, labelled by strings.
+ * Each of these [String label, TrieNode child] pairs is considered an "edge".
+ * The first character of each label must be distinct. When managing children,
+ * different implementations may trade space for time. Each node also stores an
+ * arbitrary Object value.
+ * <p>
+ * 
+ * Design note: this is a "dumb" class. It is <i>only</i> responsible for
+ * managing its value and its children. None of its operations are recursive;
+ * that is Trie's job. Nor does it deal with case.
+ */
+final class TrieNode<V> {
+    /**
+     * The value of this node.
+     */
+    private V value = null;
+
+    /**
+     * The list of children. Children are stored as a sorted Vector because it
+     * is a more compact than a tree or linked lists. Insertions and deletions
+     * are more expensive, but they are rare compared to searching.
+     * <p>
+     * 
+     * INVARIANT: children are sorted by distinct first characters of edges,
+     * i.e., for all i &lt; j,<br>
+     * children[i].edge.charAt(0) &lt; children[j].edge.charAt(0)
+     */
+    private ArrayList<TrieEdge<V>> /* of TrieEdge */children = new ArrayList<TrieEdge<V>>(0);
+
+    /**
+     * Creates a trie with no children and no value.
+     */
+    public TrieNode() {
+    }
+
+    /**
+     * Creates a trie with no children and the given value.
+     */
+    public TrieNode(V value) {
+        this.value = value;
+    }
+
+    /**
+     * Gets the value associated with this node, or null if none.
+     */
+    public V getValue() {
+        return value;
+    }
+
+    /**
+     * Sets the value associated with this node.
+     */
+    public void setValue(V value) {
+        this.value = value;
+    }
+
+    /**
+     * Get the nth child edge of this node.
+     * 
+     * @requires 0 &lt;= i &lt; children.size()
+     */
+    private final TrieEdge<V> get(int i) {
+        return children.get(i);
+    }
+
+    /**
+     * (internal) If exact, returns the unique i so that:
+     * children[i].getLabelStart() == c<br>
+     * If !exact, returns the largest i so that: children[i].getLabelStart()
+     * &lt;= c<br>
+     * In either case, returns -1 if no such i exists.
+     * <p>
+     * 
+     * This method uses binary search and runs in O(log N) time, where N =
+     * children.size().<br>
+     * The standard Java binary search methods could not be used because they
+     * only return exact matches. Also, they require allocating a dummy Trie.
+     * 
+     * Example1: Search non exact c == '_' in {[0] => 'a...', [1] => 'c...'};
+     * start loop with low = 0, high = 1; middle = 0, cmiddle == 'a', c <
+     * cmiddle, high = 0 (low == 0); middle = 0, cmiddle == 'a', c < cmiddle,
+     * high = -1 (low == 0); end loop; return high == -1 (no match, insert at
+     * 0). Example2: Search non exact c == 'a' in {[0] => 'a', [1] => 'c'} start
+     * loop with low = 0, high = 1; middle = 0, cmiddle == 'a', c == cmiddle,
+     * abort loop by returning middle == 0 (exact match). Example3: Search non
+     * exact c == 'b' in {[0] => 'a...', [1] => 'c...'}; start loop with low =
+     * 0, high = 1; middle = 0, cmiddle == 'a', cmiddle < c, low = 1 (high ==
+     * 1); middle = 1, cmiddle == 'c', c < cmiddle, high = 0 (low == 1); end
+     * loop; return high == 0 (no match, insert at 1). Example4: Search non
+     * exact c == 'c' in {[0] => 'a...', [1] => 'c...'}; start loop with low =
+     * 0, high = 1; middle = 0, cmiddle == 'a', cmiddle < c, low = 1 (high ==
+     * 1); middle = 1, cmiddle == 'c', c == cmiddle, abort loop by returning
+     * middle == 1 (exact match). Example5: Search non exact c == 'd' in {[0] =>
+     * 'a...', [1] => 'c...'}; start loop with low = 0, high = 1; middle = 0,
+     * cmiddle == 'a', cmiddle < c, low = 1 (high == 1); middle = 1, cmiddle ==
+     * 'c', cmiddle < c, low = 2 (high == 1); end loop; return high == 1 (no
+     * match, insert at 2).
+     */
+    private final int search(char c, boolean exact) {
+        // This code is stolen from IntSet.search.
+        int low = 0;
+        int high = children.size() - 1;
+        while (low <= high) {
+            int middle = (low + high) / 2;
+            char cmiddle = get(middle).getLabelStart();
+            if (cmiddle < c)
+                low = middle + 1;
+            else if (c < cmiddle)
+                high = middle - 1;
+            else
+                // c == cmiddle
+                return middle; // Return exact match.
+        }
+        if (exact)
+            return -1; // Return no match.
+        return high; // Return closest *lower or equal* match. (This works!)
+    }
+
+    /**
+     * Returns the edge (at most one) whose label starts with the given
+     * character, or null if no such edge.
+     */
+    public TrieEdge<V> get(char labelStart) {
+        int i = search(labelStart, true);
+        if (i < 0)
+            return null;
+        TrieEdge<V> ret = get(i);
+        return ret;
+    }
+
+    /**
+     * Inserts an edge with the given label to the given child to this. Keeps
+     * all edges binary sorted by their label start.
+     * 
+     * @requires label not empty.
+     * @requires for all edges E in this, label.getLabel[0] != E not already
+     *           mapped to a node.
+     * @modifies this
+     */
+    public void put(String label, TrieNode<V> child) {
+        int i;
+        // If there's a match it is the closest lower or equal one, and
+        // precondition requires it to be lower, so we add the edge *after*
+        // it. If there's no match, there are two cases: the Trie is empty,
+        // or the closest match returned is the last edge in the list.
+        if ((i = search(label.charAt(0), // find closest match
+                false)) >= 0) {
+        }
+        children.add(i + 1, new TrieEdge<V>(label, child));
+    }
+
+    /**
+     * Removes the edge (at most one) whose label starts with the given
+     * character. Returns true if any edges where actually removed.
+     */
+    public boolean remove(char labelStart) {
+        int i;
+        if ((i = search(labelStart, true)) < 0)
+            return false;
+        children.remove(i);
+        return true;
+    }
+
+    /**
+     * Ensures that this's children take a minimal amount of storage. This
+     * should be called after numerous calls to add().
+     * 
+     * @modifies this
+     */
+    public void trim() {
+        children.trimToSize();
+    }
+
+    /**
+     * Returns the children of this in forward order, as an iterator of
+     * TrieNode.
+     */
+    public Iterator childrenForward() {
+        return new ChildrenForwardIterator();
+    }
+
+    /**
+     * Maps (lambda(edge) edge.getChild) on children.iterator().
+     */
+    private class ChildrenForwardIterator extends UnmodifiableIterator {
+        int i = 0;
+
+        public boolean hasNext() {
+            return i < children.size();
+        }
+
+        public Object next() {
+            if (i < children.size())
+                return get(i++).getChild();
+            throw new NoSuchElementException();
+        }
+    }
+
+    /**
+     * Returns the children of this in forward order, as an iterator of
+     * TrieNode.
+     */
+    /*
+     * public Iterator childrenBackward() { return new
+     * ChildrenBackwardIterator(); }
+     */
+
+    /**
+     * Maps (lambda(edge) edge.getChild) on children.iteratorBackward().
+     */
+    /*
+     * private class ChildrenBackwardIterator extends UnmodifiableIterator { int
+     * i = children.size() - 1;
+     * 
+     * public boolean hasNext() { return i >= 0; }
+     * 
+     * public Object next() { if (i >= 0) return get(i--).getChild(); throw new
+     * NoSuchElementException(); } }
+     */
+
+    /**
+     * Returns the labels of the children of this in forward order, as an
+     * iterator of Strings.
+     */
+    public Iterator labelsForward() {
+        return new LabelForwardIterator();
+    }
+
+    /**
+     * Maps (lambda(edge) edge.getLabel) on children.iterator()
+     */
+    private class LabelForwardIterator extends UnmodifiableIterator {
+        int i = 0;
+
+        public boolean hasNext() {
+            return i < children.size();
+        }
+
+        public Object next() {
+            if (i < children.size())
+                return get(i++).getLabel();
+            throw new NoSuchElementException();
+        }
+    }
+
+    /**
+     * Returns the labels of the children of this in backward order, as an
+     * iterator of Strings.
+     */
+    /*
+     * public Iterator labelsBackward() { return new LabelBackwardIterator(); }
+     */
+
+    /**
+     * Maps (lambda(edge) edge.getLabel) on children.iteratorBackward()
+     */
+    /*
+     * private class LabelBackwardIterator extends UnmodifiableIterator { int i =
+     * children.size() - 1;
+     * 
+     * public boolean hasNext() { return i >= 0; }
+     * 
+     * public Object next() { if (i >= 0) return get(i--).getLabel(); throw new
+     * NoSuchElementException(); } }
+     */
+
+    // inherits javadoc comment.
+    public String toString() {
+        Object val = getValue();
+        if (val != null)
+            return val.toString();
+        return "NULL"; //$NON-NLS-1$
+    }
+
+    /**
+     * Unit test.
+     * 
+     * @see TrieNodeTest
+     */
+}
+
+/**
+ * A labelled edge, i.e., a String label and a TrieNode endpoint.
+ */
+final class TrieEdge<V> {
+    private String label;
+    private TrieNode<V> child;
+
+    /**
+     * @requires label.size() > 0
+     * @requires child != null
+     */
+    TrieEdge(String label, TrieNode<V> child) {
+        this.label = label;
+        this.child = child;
+    }
+
+    public String getLabel() {
+        return label;
+    }
+
+    /**
+     * Returns the first character of the label, i.e., getLabel().charAt(0).
+     */
+    public char getLabelStart() {
+        // You could store this char as an optimization if needed.
+        return label.charAt(0);
+    }
+
+    public TrieNode<V> getChild() {
+        return child;
+    }
+
+}
\ No newline at end of file

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnhandledException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnhandledException.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnhandledException.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnhandledException.java Sun May  5 03:39:51 2013
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class UnhandledException extends RuntimeException {
+
+    /**
+     * Required for serialization support.
+     * 
+     * @see java.io.Serializable
+     */
+    private static final long serialVersionUID = 1832101364842773720L;
+
+    /**
+     * Constructs the exception using a cause.
+     *
+     * @param cause  the underlying cause
+     */
+    public UnhandledException(Throwable cause) {
+        super(cause);
+    }
+
+    /**
+     * Constructs the exception using a message and cause.
+     *
+     * @param message  the message to use
+     * @param cause  the underlying cause
+     */
+    public UnhandledException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnmodifiableIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnmodifiableIterator.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnmodifiableIterator.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/UnmodifiableIterator.java Sun May  5 03:39:51 2013
@@ -0,0 +1,28 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Iterator;
+
+public abstract class UnmodifiableIterator implements Iterator {
+
+    public void remove() {
+        throw new UnsupportedOperationException("Cannot remove from this iterator");
+    }
+
+}
\ No newline at end of file

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Utilities.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Utilities.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Utilities.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/Utilities.java Sun May  5 03:39:51 2013
@@ -0,0 +1,140 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.kr.morph.AnalysisOutput;
+import org.apache.lucene.analysis.kr.morph.MorphException;
+import org.apache.lucene.analysis.kr.morph.PatternConstants;
+
+public class Utilities {
+
+	public static String arrayToString(String[] strs) {
+		StringBuffer sb = new StringBuffer();
+		for(String str:strs) {
+			sb.append(str);
+		}
+		return sb.toString();
+	}
+	
+	public static AnalysisOutput cloneOutput(AnalysisOutput o) throws MorphException {
+		try {
+			return o.clone();
+		} catch (CloneNotSupportedException e) {
+			throw new MorphException(e.getMessage(),e);
+		}
+	}
+	
+	public static String buildOutputString(AnalysisOutput o) {
+		
+
+		StringBuffer buff = new StringBuffer();
+	
+		buff.append(MorphUtil.buildTypeString(o.getStem(),o.getPos()));
+		if(o.getNsfx()!=null)
+			buff.append(",").append(MorphUtil.buildTypeString(o.getNsfx(),PatternConstants.POS_SFX_N));
+		
+		if(o.getPatn()==PatternConstants.PTN_NJ || o.getPatn()==PatternConstants.PTN_ADVJ) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getJosa(),PatternConstants.POS_JOSA));
+		}else if(o.getPatn()==PatternConstants.PTN_NSM) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getVsfx(),PatternConstants.POS_SFX_V));
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));
+			buff.append(",").append(MorphUtil.buildTypeString(o.getEomi(),PatternConstants.POS_EOMI));			
+		}else if(o.getPatn()==PatternConstants.PTN_NSMJ) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getVsfx(),PatternConstants.POS_SFX_V));
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_NEOMI));
+			buff.append(",").append(MorphUtil.buildTypeString(o.getJosa(),PatternConstants.POS_JOSA));
+		}else if(o.getPatn()==PatternConstants.PTN_NSMXM) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getVsfx(),PatternConstants.POS_SFX_V));
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_COPULA));
+			buff.append(",").append(MorphUtil.buildTypeString(o.getXverb(),PatternConstants.POS_XVERB));		
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));
+			buff.append(",").append(MorphUtil.buildTypeString(o.getEomi(),PatternConstants.POS_EOMI));
+		}else if(o.getPatn()==PatternConstants.PTN_NJCM) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getJosa(),PatternConstants.POS_JOSA));
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_SFX_V));
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getEomi(),PatternConstants.POS_EOMI));	
+		}else if(o.getPatn()==PatternConstants.PTN_NSMXMJ) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getVsfx(),PatternConstants.POS_SFX_V));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(1),PatternConstants.POS_COPULA));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getXverb(),PatternConstants.POS_XVERB));	
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));	
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_NEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getJosa(),PatternConstants.POS_JOSA));				
+		}else if(o.getPatn()==PatternConstants.PTN_VM) {
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getEomi(),PatternConstants.POS_EOMI));				
+		}else if(o.getPatn()==PatternConstants.PTN_VMJ) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_NEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getJosa(),PatternConstants.POS_JOSA));				
+		}else if(o.getPatn()==PatternConstants.PTN_VMCM) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_NEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(1),PatternConstants.POS_SFX_N));			
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getEomi(),PatternConstants.POS_EOMI));				
+		}else if(o.getPatn()==PatternConstants.PTN_VMXM) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_COPULA));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getXverb(),PatternConstants.POS_XVERB));
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getEomi(),PatternConstants.POS_EOMI));				
+		}else if(o.getPatn()==PatternConstants.PTN_VMXMJ) {
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(1),PatternConstants.POS_COPULA));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getXverb(),PatternConstants.POS_XVERB));	
+			if(o.getPomi()!=null) 
+				buff.append(",").append(MorphUtil.buildTypeString(o.getPomi(),PatternConstants.POS_PEOMI));	
+			buff.append(",").append(MorphUtil.buildTypeString(o.getElist().get(0),PatternConstants.POS_NEOMI));			
+			buff.append(",").append(MorphUtil.buildTypeString(o.getJosa(),PatternConstants.POS_JOSA));								
+		}
+		return buff.toString();
+		
+	}
+	
+   // -----------------------------------------------------------------------
+   /**
+    * <p>
+    * Gets a System property, defaulting to <code>null</code> if the property cannot be read.
+    * </p>
+    * 
+    * <p>
+    * If a <code>SecurityException</code> is caught, the return value is <code>null</code> and a message is written to
+    * <code>System.err</code>.
+    * </p>
+    * 
+    * @param property
+    *            the system property name
+    * @return the system property value or <code>null</code> if a security problem occurs
+    */
+   public static String getSystemProperty(String property) {
+       try {
+           return System.getProperty(property);
+       } catch (SecurityException ex) {
+           // we are not allowed to look at this property
+           System.err.println("Caught a SecurityException reading the system property '" + property
+                   + "'; the SystemUtils property value will default to null.");
+           return null;
+       }
+   }  
+	   
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/VerbUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/VerbUtil.java?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/VerbUtil.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/VerbUtil.java Sun May  5 03:39:51 2013
@@ -0,0 +1,305 @@
+package org.apache.lucene.analysis.kr.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.kr.morph.AnalysisOutput;
+import org.apache.lucene.analysis.kr.morph.MorphException;
+import org.apache.lucene.analysis.kr.morph.PatternConstants;
+import org.apache.lucene.analysis.kr.morph.WordEntry;
+
+public class VerbUtil {
+
+	public static final Map verbSuffix = new HashMap();
+	
+	public static final Map XVerb = new HashMap();
+	
+	static {
+		String[] suffixs = {
+				  "이","하","되","내", "나", "스럽","시키","있","없","같","당하","만하","드리","받","짓"};
+		for(int i=0;i<suffixs.length;i++) verbSuffix.put(suffixs[i], suffixs[i]);
+		
+		String[] xverbs = {"오","내","주","보","지","오르","올리"};
+		for(int i=0;i<xverbs.length;i++) XVerb.put(xverbs[i], xverbs[i]);
+	}
+	
+	/**
+	 * 어간이 용언화접미사로 끝나면 index 를 반환한다.  아니면 -1을 반환한다.
+	 * @param result
+	 * @return
+	 */
+   public static int endsWithVerbSuffix(String stem) {
+	    int len = stem.length();
+	    if(len<2) return -1;
+	    int start = 2;
+	    if(len==2) start = 1;	    
+		for(int i=start;i>0;i--) { // suffix 의 가장 긴 글자수가 2이다.
+			if(verbSuffix.get(stem.substring(len-i))!=null) return (len-i);
+		}		
+		return -1;
+   }
+   
+   /**
+    * 어간부에 보조용언 [하,되,오,내,주,지]가 있는지 조사한다.
+    * @param stem
+    * @return
+    */
+   public static int endsWithXVerb(String stem) {
+	    int len = stem.length();
+	    if(len<2) return -1;
+	    int start = 2;
+	    if(len==2) start = 1;
+		for(int i=start;i>0;i--) { //xverbs 의 가장 긴 글자수는 2이다.
+			if(XVerb.get(stem.substring(len-i))!=null) return (len-i);
+		}
+	   return -1;
+   }
+   
+   public static boolean verbSuffix(String stem) {
+
+	   return verbSuffix.get(stem)!=null;
+	   
+   }
+   
+   public static boolean constraintVerb(String start, String end) {
+	   
+	   char[] schs = MorphUtil.decompose(start.charAt(start.length()-1));
+	   char[] echs = MorphUtil.decompose(end.charAt(0));
+	   
+	   if(schs.length==3&&schs[2]=='ㄹ'&&echs[0]=='ㄹ') return false;
+	   
+	   return true;
+   }
+   
+   /**
+    * 3. 학교에서이다 : 체언 + '에서/부터/에서부터' + '이' + 어미 (PTN_NJCM) <br>
+    */
+   public static boolean ananlysisNJCM(AnalysisOutput o, List candidates) throws MorphException {
+ 
+	   int strlen = o.getStem().length();
+	   boolean success = false;
+	   
+	   if(strlen>3&&(o.getStem().endsWith("에서이")||o.getStem().endsWith("부터이"))) {
+		   o.addElist(o.getStem().substring(strlen-1));
+		   o.setJosa(o.getStem().substring(strlen-3,strlen-1));
+		   o.setStem(o.getStem().substring(0,strlen-3));
+		   success = true;
+	   }else if(strlen>5&&(o.getStem().endsWith("에서부터이"))) {
+		   o.addElist(o.getStem().substring(strlen-1));
+		   o.setJosa(o.getStem().substring(strlen-5,strlen-1));
+		   o.setStem(o.getStem().substring(0,strlen-5));
+		   success = true;
+	   }
+	   if(!success) return false;
+	   
+	   if(success&&DictionaryUtil.getNoun(o.getStem())!=null) {		   
+		   o.setScore(AnalysisOutput.SCORE_CORRECT);
+//	   }else {
+//			NounUtil.confirmCNoun(o);
+	   }
+	   
+	   o.setPatn(PatternConstants.PTN_NJCM);
+	   o.setPos(PatternConstants.POS_NOUN);	
+	   candidates.add(o);
+	   
+	   return true;
+   }
+   
+   /**
+    * 어미부와 어간부가 분리된 상태에서 용언화접미사가 결합될 수 있는지 조사한다.
+    * @param o	어미부와 어간부가 분리된 결과
+    * @param candidates
+    * @return
+    * @throws MorphException
+    */
+   public static boolean ananlysisNSM(AnalysisOutput o, List candidates) throws MorphException {
+
+	    if(o.getStem().endsWith("스러우")) o.setStem(o.getStem().substring(0,o.getStem().length()-3)+"스럽");
+		int idxVbSfix = VerbUtil.endsWithVerbSuffix(o.getStem());
+		if(idxVbSfix<1) return false;
+	
+		o.setVsfx(o.getStem().substring(idxVbSfix));
+		o.setStem(o.getStem().substring(0,idxVbSfix));
+		o.setPatn(PatternConstants.PTN_NSM);
+		o.setPos(PatternConstants.POS_NOUN);
+		
+		WordEntry entry = DictionaryUtil.getWordExceptVerb(o.getStem());
+				
+//		if(entry==null&&NounUtil.confirmCNoun(o)&&o.getCNounList().size()>0)	{
+//			entry = DictionaryUtil.getNoun(o.getCNounList().get(o.getCNounList().size()-1).getWord());
+//		}
+
+//		if(entry==null) return false;
+//		if(entry==null) {
+//			NounUtil.confirmDNoun(o);
+//			if(o.getScore()!=AnalysisOutput.SCORE_CORRECT) return false;
+//		}
+
+		if(entry!=null) {
+			if(entry.getFeature(WordEntry.IDX_NOUN)=='0') return false;
+			else if(o.getVsfx().equals("하")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
+			else if(o.getVsfx().equals("되")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
+			else if(o.getVsfx().equals("내")&&entry.getFeature(WordEntry.IDX_NE)!='1') return false;
+			o.setScore(AnalysisOutput.SCORE_CORRECT); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다.			
+		}else {
+			o.setScore(AnalysisOutput.SCORE_ANALYSIS); // '입니다'인 경우 인명 등 미등록어가 많이 발생되므로 분석성공으로 가정한다.
+		}
+	
+		candidates.add(o);
+
+		return true;
+
+   }
+   
+   public static boolean ananlysisNSMXM(AnalysisOutput o, List candidates) throws MorphException {
+   
+		int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
+		if(idxXVerb==-1) return false;
+		
+		String eogan = o.getStem().substring(0,idxXVerb);
+		String[] stomis = null;
+
+		if((eogan.endsWith("아")||eogan.endsWith("어"))&&eogan.length()>1)
+			stomis = EomiUtil.splitEomi(eogan.substring(0,eogan.length()-1),eogan.substring(eogan.length()-1));
+		else
+			stomis = EomiUtil.splitEomi(eogan,"");
+
+		if(stomis[0]==null) return false;
+		
+		o.addElist(stomis[1]);
+		int idxVbSfix = VerbUtil.endsWithVerbSuffix(stomis[0]);
+		if(idxVbSfix==-1) return false;
+		
+		o.setXverb(o.getStem().substring(idxXVerb));
+		o.setVsfx(stomis[0].substring(idxVbSfix));
+		o.setStem(stomis[0].substring(0,idxVbSfix));
+		o.setPatn(PatternConstants.PTN_NSMXM);
+		o.setPos(PatternConstants.POS_NOUN);
+		WordEntry entry = DictionaryUtil.getNoun(o.getStem());
+//		if(entry==null&&NounUtil.confirmCNoun(o)&&o.getCNounList().size()>0)	{
+//			entry = DictionaryUtil.getNoun(o.getCNounList().get(o.getCNounList().size()-1));
+//		}
+		if(entry==null) return false;	
+		
+		if(o.getVsfx().equals("하")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
+		if(o.getVsfx().equals("되")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;				
+		o.setScore(AnalysisOutput.SCORE_CORRECT);
+		
+		candidates.add(o);						
+
+		
+	   return true;	   
+   }
+   
+   public static boolean analysisVMCM(AnalysisOutput o, List candidates) throws MorphException {
+   
+	   int strlen = o.getStem().length();
+	   
+	   if(strlen<2) return false;
+	   
+	   if(!o.getStem().endsWith("이")) return false;
+	   
+	   char[] chrs = MorphUtil.decompose(o.getStem().charAt(strlen-2));
+	   boolean success = false;
+	
+	   if(strlen>2&&o.getStem().endsWith("기이")) {
+		   o.setStem(o.getStem().substring(0,strlen-2));
+		   o.addElist("기");	   
+		   success = true;		   
+	   } else if(chrs.length>2&&chrs[2]=='ㅁ'){
+		   String[] eres = EomiUtil.splitEomi(o.getStem().substring(0,strlen-1), "");
+			if(eres[0]==null) return false;
+			
+		   o.addElist(eres[1]);		   
+		   String[] irrs = IrregularUtil.restoreIrregularVerb(eres[0], eres[1]);
+		   
+		   if(irrs!=null) o.setStem(irrs[0]);
+		   else o.setStem(eres[0]);
+
+		   success = true;
+	   }
+	   
+	   if(success) {		
+	   
+		   o.addElist("이");
+			if(DictionaryUtil.getVerb(o.getStem())!=null) {
+				o.setPos(PatternConstants.POS_VERB);
+				o.setPatn(PatternConstants.PTN_VMCM);
+				o.setScore(AnalysisOutput.SCORE_CORRECT);
+				candidates.add(o);
+				return true;
+			}		   
+	   }
+	   
+	   return false;
+	   
+   }
+   
+   /**
+    * 
+    * 6. 도와주다 : 용언 + '아/어' + 보조용언 + 어미 (PTN_VMXM)
+    * 
+    * @param o
+    * @param candidates
+    * @return
+    * @throws MorphException
+    */
+   public static boolean analysisVMXM(AnalysisOutput o, List candidates) throws MorphException {
+
+		int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
+
+		if(idxXVerb==-1) return false;
+			
+		o.setXverb(o.getStem().substring(idxXVerb));
+		
+		String eogan = o.getStem().substring(0,idxXVerb);
+
+		String[] stomis = null;
+		if(eogan.endsWith("아")||eogan.endsWith("어")) {
+			stomis = EomiUtil.splitEomi(eogan.substring(0,eogan.length()-1),eogan.substring(eogan.length()-1));
+			if(stomis[0]==null) return false;
+		}else {
+			stomis =  EomiUtil.splitEomi(eogan, "");			
+			if(stomis[0]==null||!(stomis[1].startsWith("아")||stomis[1].startsWith("어"))) return false;
+		}
+
+		String[] irrs = IrregularUtil.restoreIrregularVerb(stomis[0], stomis[1]);
+		if(irrs!=null) {
+			o.setStem(irrs[0]);
+			o.addElist(irrs[1]);
+		} else {
+			o.setStem(stomis[0]);
+			o.addElist(stomis[1]);
+		}
+
+		if(DictionaryUtil.getVerb(o.getStem())!=null) {
+			o.setPos(PatternConstants.POS_VERB);
+			o.setPatn(PatternConstants.PTN_VMXM);
+			o.setScore(AnalysisOutput.SCORE_CORRECT);
+			candidates.add(o);
+			return true;
+		}	
+
+		return false;	   
+   }
+    
+}

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/cj.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/cj.dic?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/cj.dic (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/cj.dic Sun May  5 03:39:51 2013
@@ -0,0 +1,2 @@
+###################
+金融:금융
\ No newline at end of file

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/compounds.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/compounds.dic?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/compounds.dic (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/compounds.dic Sun May  5 03:39:51 2013
@@ -0,0 +1,8 @@
+###################
+밤하늘:밤,하늘
+경전철:경,전철
+가서명:가,서명
+가입국:가,입국
+갓김치:갓,김치
+과소비:과,소비
+고투자율:고투자,투자,투자율
\ No newline at end of file

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/eomi.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/eomi.dic?rev=1479234&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/eomi.dic (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/kr/dic/eomi.dic Sun May  5 03:39:51 2013
@@ -0,0 +1,748 @@
+//#######
+거나
+거늘
+거니
+거니와
+거드면
+거드면은
+거든
+거들랑
+거들랑은
+ê±´
+건대
+건댄
+건마는
+건만
+것다
+게
+게끔
+게나
+게나마
+게는
+게도
+게라도
+게만
+게만은
+게시리
+게요
+ê³ 
+고는
+고도
+고만
+고말고
+고서
+고서는
+고서도
+고선
+고야
+고요
+고자
+곤
+관데
+구나
+구려
+구료
+구먼
+êµ°
+군요
+기
+기까지
+기까지는
+기까지도
+기까지만
+기까지만은
+기로
+기로서
+기로서니
+기로선들
+기에
+긴
+길
+나
+나니
+나마
+나요
+나이까
+나이다
+냐
+냐고
+냐는
+냐라고
+냐라고도
+냐라고만
+냐에
+네
+네만
+네요
+노
+노라
+노라고
+노라니
+노라면
+느냐
+느냐고
+느냐는
+느냐라고
+느냐라고는
+느냐라고도
+느냐라고만
+느냐라고만은
+느냐에
+느뇨
+느니
+느니라
+느니만
+느라
+느라고
+는
+는가
+는가라고
+는가라는
+는가를
+는가에
+는걸
+는고
+는구나
+는구려
+는구료
+는구먼
+는군
+는다
+는다거나
+는다고
+는다고는
+는다는
+는다는데
+는다니
+는다니까
+는다든지
+는다마는
+는다만
+는다만은
+는다며
+는다며는
+는다면
+는다면서
+는다면은
+는단다
+는담
+는답니까
+는답니다
+는답디까
+는답디다
+는답시고
+는대
+는대로
+는대서
+는대서야
+는대야
+는대요
+는데
+는데는
+는데다
+는데도
+는데서
+는만큼
+는만큼만
+는바
+는지
+는지가
+는지고
+는지는
+는지도
+는지라
+는지를
+는지만
+는지에
+는지요
+는지의
+니
+니까
+니까는
+니깐
+니라
+니만치
+니만큼
+다
+다가
+다가는
+다가도
+다간
+다거나
+다고
+다고까지
+다고까지는
+다고까지도
+다고까지라도
+다고까지만
+다고까지만은
+다고는
+다고도
+다고만
+다고만은
+다고요
+다곤
+다느냐
+다느니
+다는
+다는데
+다니
+다마는
+다마다
+다만
+다만은
+다며
+다며는
+다면
+다면서
+다면서도
+다면야
+다면은
+다시피
+다오
+단
+단다
+담
+답시고
+더구나
+더구려
+더구먼
+더군
+더군요
+더냐
+더니
+더니라
+더니마는
+더니만
+더라
+더라도
+더라며는
+더라면
+더란
+더면
+던
+던가
+던가요
+던걸
+던걸요
+던고
+던데
+던데다
+던데요
+던들
+던지
+데
+데도
+데요
+도록
+도록까지
+도록까지도
+도록까지만
+도록까지만요
+도록까지만은
+되
+든
+든지
+듯
+듯이
+디
+라
+라고
+라고까지
+라고까지는
+라고까지도
+라고까지만
+라고까지만은
+라고는
+라고도
+라고만
+라고만은
+라곤
+라느니
+라는
+라는데
+라는데도
+라는데요
+라니
+라니까
+라니까요
+라도
+라든지
+라며
+라면
+라면서
+라면서까지
+라면서까지도
+라면서도
+라면서요
+란
+란다
+란다고
+람
+랍니까
+랍니다
+랍디까
+랍디다
+랍시고
+래
+래도
+랴
+랴마는
+러
+러니
+러니라
+러니이까
+러니이다
+러만
+러만은
+러이까
+러이다
+런가
+런들
+ë ¤
+려거든
+려고
+려고까지
+려고까지도
+려고까지만
+려고까지만은
+려고는
+려고도
+려고만
+려고만은
+려고요
+려기에
+려나
+려네
+려느냐
+려는
+려는가
+려는데
+려는데요
+려는지
+려니
+려니까
+려니와
+려다
+려다가
+려다가는
+려다가도
+려다가요
+려더니
+려더니만
+려던
+려면
+려면요
+려면은
+려무나
+ë ¨
+련마는
+련만
+ë ´
+렷다
+리
+리까
+리니
+리니라
+리다
+리라
+리라는
+리란
+리로다
+리만치
+리만큼
+리요
+리요마는
+마
+매
+ë©°
+며는
+ë©´
+면서
+면서까지
+면서까지도
+면서까지만은
+면서도
+면서부터
+면서부터는
+면요
+면은
+므로
+사
+사오이다
+사옵니까
+사옵니다
+사옵디까
+사옵디다
+사외다
+세
+세요
+소
+소서
+소이다
+쇠다
+시오
+습니까
+습니다
+습니다마는
+습니다만
+습디까
+습디다
+습디다마는
+습디다만
+아
+아다
+아다가
+아도
+아라
+아서
+아서까지
+아서는
+아서도
+아서만
+아서요
+아선
+아야
+아야만
+아요
+어
+어다
+어다가
+어도
+어라
+어서
+어서까지
+어서는
+어서도
+어서만
+어서만은
+어선
+어야
+어야만
+어야지
+어야지만
+어요
+어지이다
+언정
+엇다
+오
+오리까
+오리까마는
+오리까만
+오리다
+오이다
+올습니다
+올습니다마는
+올습니다만
+올시다
+옵나이까
+옵나이다
+옵니까
+옵니다
+옵니다만
+옵디까
+옵디다
+외다
+요
+으나
+으나마
+으냐
+으냐고
+으니
+으니까
+으니까는
+으니깐
+으니라
+으니만치
+으니만큼
+으라
+으라고
+으라고까지
+으라고까지는
+으라고까지도
+으라고까지만은
+으라고는
+으라고도
+으라고만
+으라고만은
+으라고요
+으라느니
+으라는
+으라니
+으라니까
+으라든지
+으라며
+으라면
+으라면서
+으라면은
+으란
+으람
+으랍니까
+으랍니다
+으래
+으래서
+으래서야
+으래야
+으래요
+으랴
+으랴마는
+으러
+으러까지
+으러까지도
+으려
+으려거든
+으려고
+으려고까지
+으려고까지는
+으려고까지도
+으려고까지만
+으려고까지만은
+으려고는
+으려고도
+으려고만
+으려고만은
+으려고요
+으려기에
+으려나
+으려느냐
+으려느냐는
+으려는
+으려는가
+으려는데
+으려는데도
+으려는데요
+으려는지
+으려니
+으려니까
+으려니와
+으려다
+으려다가
+으려다가는
+으려다가요
+으려다간
+으려더니
+으려면
+으려면야
+으려면은
+으려무나
+으려서야
+으려오
+으련
+으련다
+으련마는
+으련만
+으련만은
+으렴
+으렵니까
+으렵니다
+으렷다
+으리
+으리까
+으리니
+으리니라
+으리다
+으리라
+으리로다
+으리만치
+으리만큼
+으리요
+으마
+으매
+으며
+으면
+으면서
+으면서까지
+으면서까지도
+으면서까지만
+으면서까지만은
+으면서는
+으면서도
+으면서부터
+으면서부터까지
+으면서부터까지도
+으면서부터는
+으면서요
+으면요
+으면은
+으므로
+으세요
+으셔요
+으소서
+으시어요
+으오
+으오리까
+으오리다
+으오이다
+으옵니까
+으옵니다
+으옵니다만
+으옵디까
+으옵디다
+으외다
+으이
+은
+은가
+은가를
+은가에
+은가에도
+은가에만
+은가요
+은걸
+은걸요
+은고
+은다
+은다고
+은다고까지
+은다고까지도
+은다고는
+은다는
+은다는데
+은다니
+은다니까
+은다든지
+은다마는
+은다며
+은다면
+은다면서
+은다면서도
+은다면요
+은다면은
+은단다
+은담
+은답니까
+은답니다
+은답디까
+은답디다
+은답시고
+은대
+은대서
+은대서야
+은대야
+은대요
+은데
+은데는
+은데다
+은데도
+은데도요
+은데서
+은들
+은만큼
+은만큼도
+은만큼만은
+은만큼은
+은바
+은즉
+은즉슨
+은지
+은지가
+은지고
+은지는
+은지도
+은지라
+은지라도
+은지를
+은지만
+은지만은
+은지요
+을
+을거나
+을거냐
+을거다
+을거야
+을거지요
+을걸
+을까
+을까마는
+을까봐
+을까요
+을께
+을께요
+을꼬
+을는지
+을는지요
+을라
+을라고
+을라고까지
+을라고까지도
+을라고까지만
+을라고는
+을라고도
+을라고만
+을라고만은
+을라고요
+을라요
+을라치면
+을락
+을래
+을래도
+을래요
+을러니
+을러라
+을런가
+을런고
+을레
+을레라
+을만한
+을망정
+을밖에
+을밖에요
+을뿐더러
+을새
+을세라
+을세말이지
+을소냐
+을수록
+을쏘냐
+을이만큼
+을작이면
+을지
+을지가
+을지나
+을지니
+을지니라
+을지도
+을지라
+을지라도
+을지어다
+을지언정
+을지요
+을진대
+을진댄
+을진저
+을테다
+을텐데
+음
+음세
+음에도
+음에랴
+읍쇼
+읍시다
+읍시다요
+읍시오
+이
+일까
+자
+자고
+자고까지
+자고까지는
+자고까지라도
+자고는
+자고도
+자고만
+자고만은
+자꾸나
+자는
+자마자
+자면
+자면요
+잔
+잘
+지
+지는
+지도
+지를
+지마는
+지만
+지요
+진
+질
\ No newline at end of file