You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2013/05/06 00:26:36 UTC
svn commit: r1479410 [5/10] - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang: ./
src/java/org/apache/lucene/analysis/kr/
src/java/org/apache/lucene/analysis/kr/morph/
src/java/org/apache/lucene/analysis/kr/tagging/ src/java/org/apache/lucen...
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java?rev=1479410&r1=1479409&r2=1479410&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/DictionaryUtil.java Sun May 5 22:26:35 2013
@@ -28,281 +28,280 @@ import org.apache.lucene.analysis.kr.mor
import org.apache.lucene.analysis.kr.morph.WordEntry;
public class DictionaryUtil {
-
- private static Trie<String,WordEntry> dictionary;
-
- private static HashMap josas;
-
- private static HashMap eomis;
-
- private static HashMap prefixs;
-
- private static HashMap suffixs;
-
- private static HashMap<String,WordEntry> uncompounds;
-
- private static HashMap<String, String> cjwords;
-
- /**
- * ì¬ì ì ë¡ëíë¤.
- */
- public synchronized static void loadDictionary() throws MorphException {
-
- dictionary = new Trie<String, WordEntry>(true);
- List<String> strList = null;
- List<String> compounds = null;
- try {
- strList = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_DICTIONARY),"UTF-8");
- strList.addAll(FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_EXTENSION),"UTF-8"));
- compounds = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_COMPOUNDS),"UTF-8");
- } catch (IOException e) {
- new MorphException(e.getMessage(),e);
- } catch (Exception e) {
- new MorphException(e.getMessage(),e);
- }
- if(strList==null) throw new MorphException("dictionary is null");;
-
- for(String str:strList) {
- String[] infos = str.split("[,]+");
- if(infos.length!=2) continue;
- infos[1] = infos[1].trim();
- if(infos[1].length()==6) infos[1] = infos[1].substring(0,5)+"000"+infos[1].substring(5);
-
- WordEntry entry = new WordEntry(infos[0].trim(),infos[1].trim().toCharArray());
- dictionary.add(entry.getWord(), entry);
- }
-
- for(String compound: compounds) {
- String[] infos = compound.split("[:]+");
- if(infos.length!=2) continue;
- WordEntry entry = new WordEntry(infos[0].trim(),"20000000X".toCharArray());
- entry.setCompounds(compoundArrayToList(infos[1], infos[1].split("[,]+")));
- dictionary.add(entry.getWord(), entry);
- }
- }
-
- public static Iterator findWithPrefix(String prefix) throws MorphException {
- if(dictionary==null) loadDictionary();
- return dictionary.getPrefixedBy(prefix);
- }
-
- public static WordEntry getWord(String key) throws MorphException {
- if(dictionary==null) loadDictionary();
- if(key.length()==0) return null;
-
- return (WordEntry)dictionary.get(key);
- }
-
- public static WordEntry getWordExceptVerb(String key) throws MorphException {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_NOUN)=='1'||
- entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
- return null;
- }
-
- public static WordEntry getNoun(String key) throws MorphException {
-
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_NOUN)=='1') return entry;
- return null;
- }
-
- public static WordEntry getCNoun(String key) throws MorphException {
-
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_NOUN)=='1' || entry.getFeature(WordEntry.IDX_NOUN)=='2') return entry;
- return null;
- }
-
- public static WordEntry getVerb(String key) throws MorphException {
-
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_VERB)=='1') {
- return entry;
- }
- return null;
- }
-
- public static WordEntry getAdverb(String key) throws MorphException {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
- return null;
- }
-
- public static WordEntry getBusa(String key) throws MorphException {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_BUSA)=='1'&&entry.getFeature(WordEntry.IDX_NOUN)=='0') return entry;
- return null;
- }
-
- public static WordEntry getIrrVerb(String key, char irrType) throws MorphException {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_VERB)=='1'&&
- entry.getFeature(WordEntry.IDX_REGURA)==irrType) return entry;
- return null;
- }
-
- public static WordEntry getBeVerb(String key) throws MorphException {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_BEV)=='1') return entry;
- return null;
- }
-
- public static WordEntry getDoVerb(String key) throws MorphException {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_DOV)=='1') return entry;
- return null;
- }
-
- public static WordEntry getUncompound(String key) throws MorphException {
-
- try {
- if(uncompounds==null) {
- uncompounds = new HashMap();
- List<String> lines = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_UNCOMPOUNDS),"UTF-8");
- for(String compound: lines) {
- String[] infos = compound.split("[:]+");
- if(infos.length!=2) continue;
- WordEntry entry = new WordEntry(infos[0].trim(),"90000X".toCharArray());
- entry.setCompounds(compoundArrayToList(infos[1], infos[1].split("[,]+")));
- uncompounds.put(entry.getWord(), entry);
- }
- }
- }catch(Exception e) {
- throw new MorphException(e);
- }
- return uncompounds.get(key);
- }
-
- public static String getCJWord(String key) throws MorphException {
-
- try {
- if(cjwords==null) {
- cjwords = new HashMap();
- List<String> lines = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_CJ),"UTF-8");
- for(String cj: lines) {
- String[] infos = cj.split("[:]+");
- if(infos.length!=2) continue;
- cjwords.put(infos[0], infos[1]);
- }
- }
- }catch(Exception e) {
- throw new MorphException(e);
- }
- return cjwords.get(key);
-
- }
-
- public static boolean existJosa(String str) throws MorphException {
- if(josas==null) {
- josas = new HashMap();
- readFile(josas,KoreanEnv.FILE_JOSA);
- }
- if(josas.get(str)==null) return false;
- else return true;
- }
-
- public static boolean existEomi(String str) throws MorphException {
- if(eomis==null) {
- eomis = new HashMap();
- readFile(eomis,KoreanEnv.FILE_EOMI);
- }
-
- if(eomis.get(str)==null) return false;
- else return true;
- }
-
- public static boolean existPrefix(String str) throws MorphException {
- if(prefixs==null) {
- prefixs = new HashMap();
- readFile(prefixs,KoreanEnv.FILE_PREFIX);
- }
-
- if(prefixs.get(str)==null) return false;
- else return true;
- }
-
- public static boolean existSuffix(String str) throws MorphException {
- if(suffixs==null) {
- suffixs = new HashMap();
- readFile(suffixs,KoreanEnv.FILE_SUFFIX);
- }
-
- if(suffixs.get(str)!=null) return true;
-
- return false;
- }
-
- /**
- * ã´,ã¹,ã
,ã
ê³¼ eomi ê° ê²°í©íì¬ ì´ë¯¸ê° ë ì ìëì§ ì ê²íë¤.
- * @param s
- * @param end
- * @return
- */
- public static String combineAndEomiCheck(char s, String eomi) throws MorphException {
-
- if(eomi==null) eomi="";
-
- if(s=='ã´') eomi = "ì"+eomi;
- else if(s=='ã¹') eomi = "ì"+eomi;
- else if(s=='ã
') eomi = "ì"+eomi;
- else if(s=='ã
') eomi = "ìµ"+eomi;
- else eomi = s+eomi;
-
- if(existEomi(eomi)) return eomi;
-
- return null;
-
- }
-
- /**
- *
- * @param map
- * @param type 1: josa, 2: eomi
- * @throws MorphException
- */
- private static synchronized void readFile(HashMap map, String dic) throws MorphException {
-
- String path = KoreanEnv.getInstance().getValue(dic);
-
- try{
- List<String> line = FileUtil.readLines(path,"UTF-8");
- for(int i=1;i<line.size();i++) {
- map.put(line.get(i).trim(), line.get(i));
- }
- }catch(IOException e) {
- throw new MorphException(e.getMessage(),e);
- } catch (Exception e) {
- throw new MorphException(e.getMessage(),e);
- }
- }
-
- private static List compoundArrayToList(String source, String[] arr) {
- List list = new ArrayList();
- for(String str: arr) {
- CompoundEntry ce = new CompoundEntry(str);
- ce.setOffset(source.indexOf(str));
- list.add(ce);
- }
- return list;
- }
+
+ private static Trie<String,WordEntry> dictionary;
+
+ private static HashMap josas;
+
+ private static HashMap eomis;
+
+ private static HashMap prefixs;
+
+ private static HashMap suffixs;
+
+ private static HashMap<String,WordEntry> uncompounds;
+
+ private static HashMap<String, String> cjwords;
+
+ /**
+ * ì¬ì ì ë¡ëíë¤.
+ */
+ public synchronized static void loadDictionary() throws MorphException {
+
+ dictionary = new Trie<String, WordEntry>(true);
+ List<String> strList = null;
+ List<String> compounds = null;
+ try {
+ strList = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_DICTIONARY),"UTF-8");
+ strList.addAll(FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_EXTENSION),"UTF-8"));
+ compounds = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_COMPOUNDS),"UTF-8");
+ } catch (IOException e) {
+ new MorphException(e.getMessage(),e);
+ } catch (Exception e) {
+ new MorphException(e.getMessage(),e);
+ }
+ if(strList==null) throw new MorphException("dictionary is null");;
+
+ for(String str:strList) {
+ String[] infos = str.split("[,]+");
+ if(infos.length!=2) continue;
+ infos[1] = infos[1].trim();
+ if(infos[1].length()==6) infos[1] = infos[1].substring(0,5)+"000"+infos[1].substring(5);
+
+ WordEntry entry = new WordEntry(infos[0].trim(),infos[1].trim().toCharArray());
+ dictionary.add(entry.getWord(), entry);
+ }
+
+ for(String compound: compounds) {
+ String[] infos = compound.split("[:]+");
+ if(infos.length!=2) continue;
+ WordEntry entry = new WordEntry(infos[0].trim(),"20000000X".toCharArray());
+ entry.setCompounds(compoundArrayToList(infos[1], infos[1].split("[,]+")));
+ dictionary.add(entry.getWord(), entry);
+ }
+ }
+
+ public static Iterator findWithPrefix(String prefix) throws MorphException {
+ if(dictionary==null) loadDictionary();
+ return dictionary.getPrefixedBy(prefix);
+ }
+
+ public static WordEntry getWord(String key) throws MorphException {
+ if(dictionary==null) loadDictionary();
+ if(key.length()==0) return null;
+
+ return (WordEntry)dictionary.get(key);
+ }
+
+ public static WordEntry getWordExceptVerb(String key) throws MorphException {
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_NOUN)=='1'||
+ entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
+ return null;
+ }
+
+ public static WordEntry getNoun(String key) throws MorphException {
+
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_NOUN)=='1') return entry;
+ return null;
+ }
+
+ public static WordEntry getCNoun(String key) throws MorphException {
+
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_NOUN)=='1' || entry.getFeature(WordEntry.IDX_NOUN)=='2') return entry;
+ return null;
+ }
+
+ public static WordEntry getVerb(String key) throws MorphException {
+
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_VERB)=='1') {
+ return entry;
+ }
+ return null;
+ }
+
+ public static WordEntry getAdverb(String key) throws MorphException {
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
+ return null;
+ }
+
+ public static WordEntry getBusa(String key) throws MorphException {
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_BUSA)=='1'&&entry.getFeature(WordEntry.IDX_NOUN)=='0') return entry;
+ return null;
+ }
+
+ public static WordEntry getIrrVerb(String key, char irrType) throws MorphException {
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_VERB)=='1'&&
+ entry.getFeature(WordEntry.IDX_REGURA)==irrType) return entry;
+ return null;
+ }
+
+ public static WordEntry getBeVerb(String key) throws MorphException {
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_BEV)=='1') return entry;
+ return null;
+ }
+
+ public static WordEntry getDoVerb(String key) throws MorphException {
+ WordEntry entry = getWord(key);
+ if(entry==null) return null;
+
+ if(entry.getFeature(WordEntry.IDX_DOV)=='1') return entry;
+ return null;
+ }
+
+ public static WordEntry getUncompound(String key) throws MorphException {
+
+ try {
+ if(uncompounds==null) {
+ uncompounds = new HashMap();
+ List<String> lines = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_UNCOMPOUNDS),"UTF-8");
+ for(String compound: lines) {
+ String[] infos = compound.split("[:]+");
+ if(infos.length!=2) continue;
+ WordEntry entry = new WordEntry(infos[0].trim(),"90000X".toCharArray());
+ entry.setCompounds(compoundArrayToList(infos[1], infos[1].split("[,]+")));
+ uncompounds.put(entry.getWord(), entry);
+ }
+ }
+ }catch(Exception e) {
+ throw new MorphException(e);
+ }
+ return uncompounds.get(key);
+ }
+
+ public static String getCJWord(String key) throws MorphException {
+
+ try {
+ if(cjwords==null) {
+ cjwords = new HashMap();
+ List<String> lines = FileUtil.readLines(KoreanEnv.getInstance().getValue(KoreanEnv.FILE_CJ),"UTF-8");
+ for(String cj: lines) {
+ String[] infos = cj.split("[:]+");
+ if(infos.length!=2) continue;
+ cjwords.put(infos[0], infos[1]);
+ }
+ }
+ }catch(Exception e) {
+ throw new MorphException(e);
+ }
+ return cjwords.get(key);
+
+ }
+
+ public static boolean existJosa(String str) throws MorphException {
+ if(josas==null) {
+ josas = new HashMap();
+ readFile(josas,KoreanEnv.FILE_JOSA);
+ }
+ if(josas.get(str)==null) return false;
+ else return true;
+ }
+
+ public static boolean existEomi(String str) throws MorphException {
+ if(eomis==null) {
+ eomis = new HashMap();
+ readFile(eomis,KoreanEnv.FILE_EOMI);
+ }
+
+ if(eomis.get(str)==null) return false;
+ else return true;
+ }
+
+ public static boolean existPrefix(String str) throws MorphException {
+ if(prefixs==null) {
+ prefixs = new HashMap();
+ readFile(prefixs,KoreanEnv.FILE_PREFIX);
+ }
+
+ if(prefixs.get(str)==null) return false;
+ else return true;
+ }
+
+ public static boolean existSuffix(String str) throws MorphException {
+ if(suffixs==null) {
+ suffixs = new HashMap();
+ readFile(suffixs,KoreanEnv.FILE_SUFFIX);
+ }
+
+ if(suffixs.get(str)!=null) return true;
+
+ return false;
+ }
+
+ /**
+ * ã´,ã¹,ã
,ã
ê³¼ eomi ê° ê²°í©íì¬ ì´ë¯¸ê° ë ì ìëì§ ì ê²íë¤.
+ * @param s
+ * @param end
+ * @return
+ */
+ public static String combineAndEomiCheck(char s, String eomi) throws MorphException {
+
+ if(eomi==null) eomi="";
+
+ if(s=='ã´') eomi = "ì"+eomi;
+ else if(s=='ã¹') eomi = "ì"+eomi;
+ else if(s=='ã
') eomi = "ì"+eomi;
+ else if(s=='ã
') eomi = "ìµ"+eomi;
+ else eomi = s+eomi;
+
+ if(existEomi(eomi)) return eomi;
+
+ return null;
+
+ }
+
+ /**
+ *
+ * @param map
+ * @param type 1: josa, 2: eomi
+ * @throws MorphException
+ */
+ private static synchronized void readFile(HashMap map, String dic) throws MorphException {
+
+ String path = KoreanEnv.getInstance().getValue(dic);
+
+ try{
+ List<String> line = FileUtil.readLines(path,"UTF-8");
+ for(int i=1;i<line.size();i++) {
+ map.put(line.get(i).trim(), line.get(i));
+ }
+ }catch(IOException e) {
+ throw new MorphException(e.getMessage(),e);
+ } catch (Exception e) {
+ throw new MorphException(e.getMessage(),e);
+ }
+ }
+
+ private static List compoundArrayToList(String source, String[] arr) {
+ List list = new ArrayList();
+ for(String str: arr) {
+ CompoundEntry ce = new CompoundEntry(str);
+ ce.setOffset(source.indexOf(str));
+ list.add(ce);
+ }
+ return list;
+ }
}
-
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java?rev=1479410&r1=1479409&r2=1479410&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/kr/utils/EomiUtil.java Sun May 5 22:26:35 2013
@@ -25,511 +25,507 @@ import org.apache.lucene.analysis.kr.mor
import org.apache.lucene.analysis.kr.morph.PatternConstants;
public class EomiUtil {
-
-
- public static final String RESULT_FAIL = "0";
-
- public static final String RESULT_SUCCESS = "1";
-
- public static final String[] verbSuffix = {
- "ì´","í","ë","ì¤ë½","ì¤ë¬ì°","ìí¤","ì","ì","ê°","ë¹í","ë§í","ë리","ë°","ë","ë´"
- };
-
- /**
- * ê°ì¥ 길ì´ê° 긴 ì´ë¯¸ë¥¼ ë¶ë¦¬íë¤.
- * @param term
- * @return
- * @throws MorphException
- */
- public static String[] longestEomi(String term) throws MorphException {
-
- String[] result = new String[2];
- result[0] = term;
-
- String stem;
- String eomi;
- char[] efeature;
-
- for(int i=term.length();i>0;i--) {
-
- stem = term.substring(0,i);
-
- if(i!=term.length()) {
- eomi = term.substring(i);
- efeature = SyllableUtil.getFeature(eomi.charAt(0));
- } else {
- efeature = SyllableUtil.getFeature(stem.charAt(i-1));
- eomi="";
- }
-
- if(SyllableUtil.isAlpanumeric(stem.charAt(i-1))) break;
-
- char[] jasos = MorphUtil.decompose(stem.charAt(i-1));
-
- if(!"".equals(eomi)&&!DictionaryUtil.existEomi(eomi)) {
- // do not anything.
- } else if(jasos.length>2&&
- (jasos[2]=='ã´'||jasos[2]=='ã¹'||jasos[2]=='ã
'||jasos[2]=='ã
')&&
- DictionaryUtil.combineAndEomiCheck(jasos[2], eomi)!=null) {
- result[0] = Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 0));
- if(i!=0) result[0] = stem.substring(0,i-1)+result[0];
- result[1] = Character.toString(jasos[2]);
- }else if(i>0&&(stem.endsWith("í")&&"ì¬".equals(eomi))||
- (stem.endsWith("ê°")&&"ê±°ë¼".equals(eomi))||
- (stem.endsWith("ì¤")&&"ëë¼".equals(eomi))) {
- result[0] = stem;
- result[1] = eomi;
- }else if(jasos.length==2&&(!stem.endsWith("ì")&&!stem.endsWith("ì´"))&&
- (jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
')&&
- (DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {
- char[] chs = MorphUtil.decompose(stem.charAt(stem.length()-1));
- result[0] = stem;
- result[1] = "ì´"+eomi;
- }else if((jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
')&&
- (DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {
- String end = "";
- if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 8, 0)+"ì";
- else if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 13, 0)+"ì´";
- else if(jasos[1]=='ã
')
- end=Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 6, 0));
- else if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 0, 0)+"ì´";
- else if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 20, 0)+"ì ";
-
- if(jasos.length==3) {
- end = end.substring(0,end.length()-1)+MorphUtil.replaceJongsung(end.charAt(end.length()-1),stem.charAt(i-1));
- }
-
- if(stem.length()<2) result[0] = end;
- else result[0] = stem.substring(0,stem.length()-1)+end;
- result[1] = eomi;
-
- }else if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI1]!='0'&&
- DictionaryUtil.existEomi(eomi)) {
- if(!(((jasos.length==2&&jasos[0]=='ã¹')||(jasos.length==3&&jasos[2]=='ã¹'))&&eomi.equals("ë¬"))) { // ã¹ ë¶ê·ì¹ì ìì¸
- result[0] = stem;
- result[1] = eomi;
- }
- }
-
- if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI2]=='0') break;
- }
-
- return result;
-
- }
-
- /**
- * ì ì´ë§ì´ë¯¸ë¥¼ ë¶ìíë¤.
- * @param stem
- * @return
- */
- public static String[] splitPomi(String stem) throws MorphException {
-
- // results[0]:ì±ê³µ(1)/ì¤í¨(0), results[1]: ì´ê·¼, results[2]: ì ì´ë§ì´ë¯¸
- String[] results = new String[2];
- results[0] = stem;
-
- if(stem==null||stem.length()==0||"ì".equals(stem)) return results;
-
- char[] chrs = stem.toCharArray();
- int len = chrs.length;
- String pomi = "";
- int index = len-1;
-
- char[] jaso = MorphUtil.decompose(chrs[index]);
- if(chrs[index]!='ì'&&chrs[index]!='ã
'&&jaso[jaso.length-1]!='ã
') return results; // ì ì´ë§ì´ë¯¸ê° ë°ê²¬ëì§ ììë¤
-
- if(chrs[index]=='ê² ') {
- pomi = "ê² ";
- setPomiResult(results,stem.substring(0,index),pomi);
- if(--index<=0||
- (chrs[index]!='ì'&&chrs[index]!='ã
'&&jaso[jaso.length-1]!='ã
'))
- return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
- jaso = MorphUtil.decompose(chrs[index]);
- }
-
- if(chrs[index]=='ì') { // ìì, ã
ì, ì
- pomi = chrs[index]+pomi;
- setPomiResult(results,stem.substring(0,index),pomi);
- if(--index<=0||
- (chrs[index]!='ì'&&chrs[index]!='ã
'&&jaso[jaso.length-1]!='ã
'))
- return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
- jaso = MorphUtil.decompose(chrs[index]);
- }
-
- if(chrs[index]=='ì'){
- pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
- if(index>0&&chrs[index-1]=='í')
- stem = stem.substring(0,index);
- else
- stem = stem.substring(0,index)+"ì´";
- setPomiResult(results,stem,pomi);
- }else if(chrs[index]=='ì
¨'){
- pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
- stem = stem.substring(0,index);
- setPomiResult(results,stem,"ì"+pomi);
- }else if(chrs[index]=='ì'||chrs[index]=='ì') {
- pomi = chrs[index]+pomi;
- setPomiResult(results,stem.substring(0,index),pomi);
- if(--index<=0||
- (chrs[index]!='ì'&&chrs[index]!='ì¼')) return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
- jaso = MorphUtil.decompose(chrs[index]);
- }else if(jaso.length==3&&jaso[2]=='ã
') {
-
- if(jaso[0]=='ã
'&&jaso[1]=='ã
') {
- pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
- stem = stem.substring(0,index)+"í";
- }else if(jaso[0]!='ã
'&&(jaso[1]=='ã
'||jaso[1]=='ã
'||jaso[1]=='ã
'||jaso[1]=='ã
')) {
- pomi = "ì"+pomi;
- stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index], 0);
- }else if(jaso[0]!='ã
'&&(jaso[1]=='ã
')) {
- pomi = "ì"+pomi;
- stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],11, 0);
- } else if(jaso[1]=='ã
') {
- pomi = MorphUtil.replaceJongsung('ì',chrs[index])+pomi;
- stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],8, 0);
- } else if(jaso[1]=='ã
') {
- pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
- stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],13, 0);
- } else if(jaso[1]=='ã
') {
- pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
- stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],20, 0);
- } else if(jaso[1]=='ã
') {
- pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
- stem = stem.substring(0,index);
- } else if(jaso[1]=='ã
') {
- pomi = MorphUtil.replaceJongsung('ì ',chrs[index])+pomi;
- stem = stem.substring(0,index);
- } else {
- pomi = "ì"+pomi;
- }
- setPomiResult(results,stem,pomi);
- if(chrs[index]!='ì'&&chrs[index]!='ì¼') return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
- jaso = MorphUtil.decompose(chrs[index]);
- }
-
- char[] nChrs = null;
- if(index>0) nChrs = MorphUtil.decompose(chrs[index-1]);
- else nChrs = new char[2];
-
- if(nChrs.length==2&&chrs[index]=='ì'&&(chrs.length<=index+1||
- (chrs.length>index+1&&chrs[index+1]!='ì
¨'))) {
- if(DictionaryUtil.getWord(results[0])!=null) return results; //'ì'ê° í¬í¨ë ë¨ì´ê° ìë¤. ì±ê°ìë¤/ëìë¤/ë¤ì¤ìë¤
- pomi = chrs[index]+pomi;
- setPomiResult(results,stem.substring(0,index),pomi);
- if(--index==0||chrs[index]!='ì¼') return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
- jaso = MorphUtil.decompose(chrs[index]);
- }
-
- if(index>0) nChrs = MorphUtil.decompose(chrs[index-1]);
- else nChrs = new char[2];
- if(chrs.length>index+1&&nChrs.length==3&&(chrs[index+1]=='ì
¨'||chrs[index+1]=='ì')&&chrs[index]=='ì¼') {
- pomi = chrs[index]+pomi;
- setPomiResult(results,stem.substring(0,index),pomi);
- }
-
- return results;
- }
-
- /**
- * ë¶ê·ì¹ ì©ì¸ì ìíì 구íë¤.
- * @param output
- * @return
- * @throws MorphException
- */
- public static List irregular(AnalysisOutput output) throws MorphException {
-
- List results = new ArrayList();
-
- if(output.getStem()==null||output.getStem().length()==0)
- return results;
-
- String ending = output.getEomi();
- if(output.getPomi()!=null) ending = output.getPomi();
-
- List<String[]> irrs = new ArrayList();
-
- irregularStem(irrs,output.getStem(),ending);
- irregularEnding(irrs,output.getStem(),ending);
- irregularAO(irrs,output.getStem(),ending);
-
- try {
- for(String[] irr: irrs) {
- AnalysisOutput result = output.clone();
- result.setStem(irr[0]);
- if(output.getPatn()==PatternConstants.PTN_VM) {
- if(output.getPomi()==null) result.setEomi(irr[1]);
- else result.setPomi(irr[1]);
- }
- results.add(result);
- }
- } catch (CloneNotSupportedException e) {
- throw new MorphException(e.getMessage(),e);
- }
-
- return results;
-
- }
-
- /**
- * ì´ê°ë§ ë³íë ê²½ì°
- * @param results
- * @param stem
- * @param ending
- */
- private static void irregularStem(List results, String stem, String ending) {
-
- char feCh = ending.charAt(0);
- char[] fechJaso = MorphUtil.decompose(feCh);
- char ls = stem.charAt(stem.length()-1);
- char[] lsJaso = MorphUtil.decompose(ls);
-
- if(feCh=='ì'||feCh=='ì´'||feCh=='ì¼') {
- if(lsJaso[lsJaso.length-1]=='ã¹') { // ã· ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),7)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_DI)});
- } else if(lsJaso.length==2) { // ã
ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),19)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_SI)});
- }
- }
-
- if((fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã
'|| feCh=='ì¤'||feCh=='ì')
- &&(ls=='ì°')) { // ã
ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),17)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
- }
-
- if((fechJaso[0]=='ã´'||fechJaso[0]=='ã
'||fechJaso[0]=='ã
'|| feCh=='ì¤')
- &&(lsJaso.length==2)) { // ã¹ íë½
-
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),8)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_LI)});
- }
-
- if(lsJaso.length==2
- &&(fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã
'||fechJaso[0]=='ã
'||
- lsJaso[1]=='ã
'||lsJaso[1]=='ã
'||lsJaso[1]=='ã
'||lsJaso[1]=='ã
')
- &&!"ë".equals(stem)) { // ã
ë¶ê·ì¹, ê·¸ë¬ë [ë³ë¤]ë ã
ë¶ê·ì¹ì´ ìëë¤.
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),27)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
- }
- }
-
- /**
- * ì´ë¯¸ë§ ë³íë ê²½ì°
- * @param results
- * @param stem
- * @param ending
- */
- private static void irregularEnding(List results, String stem, String ending) {
- if(ending.startsWith("ã
")) return;
-
- char feCh = ending.charAt(0);
- char ls = stem.charAt(stem.length()-1);
-
- if(feCh=='ë¬'&&ls=='르') { // 'ë¬' ë¶ê·ì¹
- results.add(
- new String[]{stem
- ,"ì´"+ending.substring(1)
- ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
- } else if("ë¼".equals(ending)&&"ê°ê±°".equals(stem)) { // 'ê±°ë¼' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)
- ,"ì´ë¼"
- ,String.valueOf(PatternConstants.IRR_TYPE_GU)});
- } else if("ë¼".equals(ending)&&"ì¤ë".equals(stem)) { // 'ëë¼' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)
- ,"ì´ë¼"
- ,String.valueOf(PatternConstants.IRR_TYPE_NU)});
- }
-
- if("ì¬".equals(ending)&&ls=='í') { // 'ì¬' ë¶ê·ì¹
- results.add(
- new String[]{stem
- ,"ì´"
- ,String.valueOf(PatternConstants.IRR_TYPE_NU)});
- }
- }
-
- /**
- * ì´ê°ê³¼ ì´ë¯¸ê° 모ë ë³íë ê²½ì°
- * @param results
- * @param stem
- * @param ending
- */
- private static void irregularAO(List results, String stem, String ending) {
-
- char ls = stem.charAt(stem.length()-1);
- char[] lsJaso = MorphUtil.decompose(ls);
-
- if(lsJaso.length<2) return;
-
- if(lsJaso[1]=='ã
') {
- if(stem.endsWith("ëì")||stem.endsWith("ê³ ì")) { // 'ê³±ë¤', 'ëë¤'ì 'ã
' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã
'
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
- }else { // 'ì' ì¶ì½
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),8,0) // ìì + ã
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
- }
- } else if(stem.endsWith("í¼")) {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0) // ìì + -
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
- } else if(lsJaso[1]=='ã
') {
- if(stem.length()>=2) // 'ã
' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã
'
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
-
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),13,0) // ìì + ã
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
- } else if(stem.length()>=2&&ls=='ë¼') {
- char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
- if(ns.length==3&&ns[2]=='ã¹') { // 르 ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
- }
- } else if(stem.length()>=2&&ls=='ë¬') {
- char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
- if(stem.charAt(stem.length()-2)=='르') { // ë¬ ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_LO)});
- } else if(ns.length==3&&ns[2]=='ã¹') { // 르 ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
- }
- } else if(stem.endsWith("í´")||stem.endsWith("ì¼")) {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
- } else if(stem.endsWith("í´")) {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),0,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
- } else if(lsJaso.length==2&&lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_UO)});
- } else if(lsJaso.length==2&&lsJaso[1]=='ã
') {
- // ì¼ íë½
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_UO)});
- // ì ë¶ê·ì¹
- results.add(
- new String[]{stem
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_AH)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),11,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_OE)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),0,27)
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),2,27)
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
- }
-
- }
-
- /**
- * ìì ì ì´ë¯¸ë§ì ë§ë¤ì´ì ë°ííë¤.
- * @param preword 'ì' ëë 'ì´'
- * @param endword ì´ë¯¸[ì ì´ë¯¸ë§ì í¬í¨]
- * @return 'ì' ëë 'ì'ì ë§ë¤ì´ì ë°ííë¤.
- */
- public static String makeTesnseEomi(String preword, String endword) {
-
- if(preword==null||preword.length()==0) return endword;
- if(endword==null||endword.length()==0) return preword;
-
- if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),20)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã´') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),4)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã¹') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),8)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),16)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),17)+endword.substring(1,endword.length());
- }
- return preword+endword;
- }
-
-
+
+ public static final String RESULT_FAIL = "0";
+
+ public static final String RESULT_SUCCESS = "1";
+
+ public static final String[] verbSuffix = {
+ "ì´","í","ë","ì¤ë½","ì¤ë¬ì°","ìí¤","ì","ì","ê°","ë¹í","ë§í","ë리","ë°","ë","ë´"
+ };
+
+ /**
+ * ê°ì¥ 길ì´ê° 긴 ì´ë¯¸ë¥¼ ë¶ë¦¬íë¤.
+ * @param term
+ * @return
+ * @throws MorphException
+ */
+ public static String[] longestEomi(String term) throws MorphException {
+
+ String[] result = new String[2];
+ result[0] = term;
+
+ String stem;
+ String eomi;
+ char[] efeature;
+
+ for(int i=term.length();i>0;i--) {
+
+ stem = term.substring(0,i);
+
+ if(i!=term.length()) {
+ eomi = term.substring(i);
+ efeature = SyllableUtil.getFeature(eomi.charAt(0));
+ } else {
+ efeature = SyllableUtil.getFeature(stem.charAt(i-1));
+ eomi="";
+ }
+
+ if(SyllableUtil.isAlpanumeric(stem.charAt(i-1))) break;
+
+ char[] jasos = MorphUtil.decompose(stem.charAt(i-1));
+
+ if(!"".equals(eomi)&&!DictionaryUtil.existEomi(eomi)) {
+ // do not anything.
+ } else if(jasos.length>2&&
+ (jasos[2]=='ã´'||jasos[2]=='ã¹'||jasos[2]=='ã
'||jasos[2]=='ã
')&&
+ DictionaryUtil.combineAndEomiCheck(jasos[2], eomi)!=null) {
+ result[0] = Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 0));
+ if(i!=0) result[0] = stem.substring(0,i-1)+result[0];
+ result[1] = Character.toString(jasos[2]);
+ }else if(i>0&&(stem.endsWith("í")&&"ì¬".equals(eomi))||
+ (stem.endsWith("ê°")&&"ê±°ë¼".equals(eomi))||
+ (stem.endsWith("ì¤")&&"ëë¼".equals(eomi))) {
+ result[0] = stem;
+ result[1] = eomi;
+ }else if(jasos.length==2&&(!stem.endsWith("ì")&&!stem.endsWith("ì´"))&&
+ (jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
')&&
+ (DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {
+ char[] chs = MorphUtil.decompose(stem.charAt(stem.length()-1));
+ result[0] = stem;
+ result[1] = "ì´"+eomi;
+ }else if((jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
')&&
+ (DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {
+ String end = "";
+ if(jasos[1]=='ã
')
+ end=MorphUtil.makeChar(stem.charAt(i-1), 8, 0)+"ì";
+ else if(jasos[1]=='ã
')
+ end=MorphUtil.makeChar(stem.charAt(i-1), 13, 0)+"ì´";
+ else if(jasos[1]=='ã
')
+ end=Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 6, 0));
+ else if(jasos[1]=='ã
')
+ end=MorphUtil.makeChar(stem.charAt(i-1), 0, 0)+"ì´";
+ else if(jasos[1]=='ã
')
+ end=MorphUtil.makeChar(stem.charAt(i-1), 20, 0)+"ì ";
+
+ if(jasos.length==3) {
+ end = end.substring(0,end.length()-1)+MorphUtil.replaceJongsung(end.charAt(end.length()-1),stem.charAt(i-1));
+ }
+
+ if(stem.length()<2) result[0] = end;
+ else result[0] = stem.substring(0,stem.length()-1)+end;
+ result[1] = eomi;
+
+ }else if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI1]!='0'&&
+ DictionaryUtil.existEomi(eomi)) {
+ if(!(((jasos.length==2&&jasos[0]=='ã¹')||(jasos.length==3&&jasos[2]=='ã¹'))&&eomi.equals("ë¬"))) { // ã¹ ë¶ê·ì¹ì ìì¸
+ result[0] = stem;
+ result[1] = eomi;
+ }
+ }
+
+ if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI2]=='0') break;
+ }
+
+ return result;
+
+ }
+
+ /**
+ * ì ì´ë§ì´ë¯¸ë¥¼ ë¶ìíë¤.
+ * @param stem
+ * @return
+ */
+ public static String[] splitPomi(String stem) throws MorphException {
+
+ // results[0]:ì±ê³µ(1)/ì¤í¨(0), results[1]: ì´ê·¼, results[2]: ì ì´ë§ì´ë¯¸
+ String[] results = new String[2];
+ results[0] = stem;
+
+ if(stem==null||stem.length()==0||"ì".equals(stem)) return results;
+
+ char[] chrs = stem.toCharArray();
+ int len = chrs.length;
+ String pomi = "";
+ int index = len-1;
+
+ char[] jaso = MorphUtil.decompose(chrs[index]);
+ if(chrs[index]!='ì'&&chrs[index]!='ã
'&&jaso[jaso.length-1]!='ã
') return results; // ì ì´ë§ì´ë¯¸ê° ë°ê²¬ëì§ ììë¤
+
+ if(chrs[index]=='ê² ') {
+ pomi = "ê² ";
+ setPomiResult(results,stem.substring(0,index),pomi);
+ if(--index<=0||
+ (chrs[index]!='ì'&&chrs[index]!='ã
'&&jaso[jaso.length-1]!='ã
'))
+ return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
+ jaso = MorphUtil.decompose(chrs[index]);
+ }
+
+ if(chrs[index]=='ì') { // ìì, ã
ì, ì
+ pomi = chrs[index]+pomi;
+ setPomiResult(results,stem.substring(0,index),pomi);
+ if(--index<=0||
+ (chrs[index]!='ì'&&chrs[index]!='ã
'&&jaso[jaso.length-1]!='ã
'))
+ return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
+ jaso = MorphUtil.decompose(chrs[index]);
+ }
+
+ if(chrs[index]=='ì'){
+ pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
+ if(index>0&&chrs[index-1]=='í')
+ stem = stem.substring(0,index);
+ else
+ stem = stem.substring(0,index)+"ì´";
+ setPomiResult(results,stem,pomi);
+ }else if(chrs[index]=='ì
¨'){
+ pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
+ stem = stem.substring(0,index);
+ setPomiResult(results,stem,"ì"+pomi);
+ }else if(chrs[index]=='ì'||chrs[index]=='ì') {
+ pomi = chrs[index]+pomi;
+ setPomiResult(results,stem.substring(0,index),pomi);
+ if(--index<=0||
+ (chrs[index]!='ì'&&chrs[index]!='ì¼')) return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
+ jaso = MorphUtil.decompose(chrs[index]);
+ }else if(jaso.length==3&&jaso[2]=='ã
') {
+
+ if(jaso[0]=='ã
'&&jaso[1]=='ã
') {
+ pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
+ stem = stem.substring(0,index)+"í";
+ }else if(jaso[0]!='ã
'&&(jaso[1]=='ã
'||jaso[1]=='ã
'||jaso[1]=='ã
'||jaso[1]=='ã
')) {
+ pomi = "ì"+pomi;
+ stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index], 0);
+ }else if(jaso[0]!='ã
'&&(jaso[1]=='ã
')) {
+ pomi = "ì"+pomi;
+ stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],11, 0);
+ } else if(jaso[1]=='ã
') {
+ pomi = MorphUtil.replaceJongsung('ì',chrs[index])+pomi;
+ stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],8, 0);
+ } else if(jaso[1]=='ã
') {
+ pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
+ stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],13, 0);
+ } else if(jaso[1]=='ã
') {
+ pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
+ stem = stem.substring(0,index)+MorphUtil.makeChar(chrs[index],20, 0);
+ } else if(jaso[1]=='ã
') {
+ pomi = MorphUtil.replaceJongsung('ì´',chrs[index])+pomi;
+ stem = stem.substring(0,index);
+ } else if(jaso[1]=='ã
') {
+ pomi = MorphUtil.replaceJongsung('ì ',chrs[index])+pomi;
+ stem = stem.substring(0,index);
+ } else {
+ pomi = "ì"+pomi;
+ }
+ setPomiResult(results,stem,pomi);
+ if(chrs[index]!='ì'&&chrs[index]!='ì¼') return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
+ jaso = MorphUtil.decompose(chrs[index]);
+ }
+
+ char[] nChrs = null;
+ if(index>0) nChrs = MorphUtil.decompose(chrs[index-1]);
+ else nChrs = new char[2];
+
+ if(nChrs.length==2&&chrs[index]=='ì'&&(chrs.length<=index+1||
+ (chrs.length>index+1&&chrs[index+1]!='ì
¨'))) {
+ if(DictionaryUtil.getWord(results[0])!=null) return results; //'ì'ê° í¬í¨ë ë¨ì´ê° ìë¤. ì±ê°ìë¤/ëìë¤/ë¤ì¤ìë¤
+ pomi = chrs[index]+pomi;
+ setPomiResult(results,stem.substring(0,index),pomi);
+ if(--index==0||chrs[index]!='ì¼') return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
+ jaso = MorphUtil.decompose(chrs[index]);
+ }
+
+ if(index>0) nChrs = MorphUtil.decompose(chrs[index-1]);
+ else nChrs = new char[2];
+ if(chrs.length>index+1&&nChrs.length==3&&(chrs[index+1]=='ì
¨'||chrs[index+1]=='ì')&&chrs[index]=='ì¼') {
+ pomi = chrs[index]+pomi;
+ setPomiResult(results,stem.substring(0,index),pomi);
+ }
+
+ return results;
+ }
+
+ /**
+ * ë¶ê·ì¹ ì©ì¸ì ìíì 구íë¤.
+ * @param output
+ * @return
+ * @throws MorphException
+ */
+ public static List irregular(AnalysisOutput output) throws MorphException {
+
+ List results = new ArrayList();
+
+ if(output.getStem()==null||output.getStem().length()==0)
+ return results;
+
+ String ending = output.getEomi();
+ if(output.getPomi()!=null) ending = output.getPomi();
+
+ List<String[]> irrs = new ArrayList();
+
+ irregularStem(irrs,output.getStem(),ending);
+ irregularEnding(irrs,output.getStem(),ending);
+ irregularAO(irrs,output.getStem(),ending);
+
+ try {
+ for(String[] irr: irrs) {
+ AnalysisOutput result = output.clone();
+ result.setStem(irr[0]);
+ if(output.getPatn()==PatternConstants.PTN_VM) {
+ if(output.getPomi()==null) result.setEomi(irr[1]);
+ else result.setPomi(irr[1]);
+ }
+ results.add(result);
+ }
+ } catch (CloneNotSupportedException e) {
+ throw new MorphException(e.getMessage(),e);
+ }
+
+ return results;
+
+ }
+
+ /**
+ * ì´ê°ë§ ë³íë ê²½ì°
+ * @param results
+ * @param stem
+ * @param ending
+ */
+ private static void irregularStem(List results, String stem, String ending) {
+
+ char feCh = ending.charAt(0);
+ char[] fechJaso = MorphUtil.decompose(feCh);
+ char ls = stem.charAt(stem.length()-1);
+ char[] lsJaso = MorphUtil.decompose(ls);
+
+ if(feCh=='ì'||feCh=='ì´'||feCh=='ì¼') {
+ if(lsJaso[lsJaso.length-1]=='ã¹') { // ã· ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),7)
+ ,ending
+ ,String.valueOf(PatternConstants.IRR_TYPE_DI)});
+ } else if(lsJaso.length==2) { // ã
ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),19)
+ ,ending
+ ,String.valueOf(PatternConstants.IRR_TYPE_SI)});
+ }
+ }
+
+ if((fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã
'|| feCh=='ì¤'||feCh=='ì')
+ &&(ls=='ì°')) { // ã
ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),17)
+ ,ending
+ ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
+ }
+
+ if((fechJaso[0]=='ã´'||fechJaso[0]=='ã
'||fechJaso[0]=='ã
'|| feCh=='ì¤')
+ &&(lsJaso.length==2)) { // ã¹ íë½
+
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),8)
+ ,ending
+ ,String.valueOf(PatternConstants.IRR_TYPE_LI)});
+ }
+
+ if(lsJaso.length==2
+ &&(fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã
'||fechJaso[0]=='ã
'||
+ lsJaso[1]=='ã
'||lsJaso[1]=='ã
'||lsJaso[1]=='ã
'||lsJaso[1]=='ã
')
+ &&!"ë".equals(stem)) { // ã
ë¶ê·ì¹, ê·¸ë¬ë [ë³ë¤]ë ã
ë¶ê·ì¹ì´ ìëë¤.
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),27)
+ ,ending
+ ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
+ }
+ }
+
+ /**
+ * ì´ë¯¸ë§ ë³íë ê²½ì°
+ * @param results
+ * @param stem
+ * @param ending
+ */
+ private static void irregularEnding(List results, String stem, String ending) {
+ if(ending.startsWith("ã
")) return;
+
+ char feCh = ending.charAt(0);
+ char ls = stem.charAt(stem.length()-1);
+
+ if(feCh=='ë¬'&&ls=='르') { // 'ë¬' ë¶ê·ì¹
+ results.add(
+ new String[]{stem
+ ,"ì´"+ending.substring(1)
+ ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
+ } else if("ë¼".equals(ending)&&"ê°ê±°".equals(stem)) { // 'ê±°ë¼' ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)
+ ,"ì´ë¼"
+ ,String.valueOf(PatternConstants.IRR_TYPE_GU)});
+ } else if("ë¼".equals(ending)&&"ì¤ë".equals(stem)) { // 'ëë¼' ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)
+ ,"ì´ë¼"
+ ,String.valueOf(PatternConstants.IRR_TYPE_NU)});
+ }
+
+ if("ì¬".equals(ending)&&ls=='í') { // 'ì¬' ë¶ê·ì¹
+ results.add(
+ new String[]{stem
+ ,"ì´"
+ ,String.valueOf(PatternConstants.IRR_TYPE_NU)});
+ }
+ }
+
+ /**
+ * ì´ê°ê³¼ ì´ë¯¸ê° 모ë ë³íë ê²½ì°
+ * @param results
+ * @param stem
+ * @param ending
+ */
+ private static void irregularAO(List results, String stem, String ending) {
+
+ char ls = stem.charAt(stem.length()-1);
+ char[] lsJaso = MorphUtil.decompose(ls);
+
+ if(lsJaso.length<2) return;
+
+ if(lsJaso[1]=='ã
') {
+ if(stem.endsWith("ëì")||stem.endsWith("ê³ ì")) { // 'ê³±ë¤', 'ëë¤'ì 'ã
' ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-2)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã
'
+ ,makeTesnseEomi("ì",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
+ }else { // 'ì' ì¶ì½
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),8,0) // ìì + ã
+ ,makeTesnseEomi("ì",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
+ }
+ } else if(stem.endsWith("í¼")) {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0) // ìì + -
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
+ } else if(lsJaso[1]=='ã
') {
+ if(stem.length()>=2) // 'ã
' ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-2)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã
'
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
+
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),13,0) // ìì + ã
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
+ } else if(stem.length()>=2&&ls=='ë¼') {
+ char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
+ if(ns.length==3&&ns[2]=='ã¹') { // 르 ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-2)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
+ ,makeTesnseEomi("ì",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
+ }
+ } else if(stem.length()>=2&&ls=='ë¬') {
+ char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
+ if(stem.charAt(stem.length()-2)=='르') { // ë¬ ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_LO)});
+ } else if(ns.length==3&&ns[2]=='ã¹') { // 르 ë¶ê·ì¹
+ results.add(
+ new String[]{stem.substring(0,stem.length()-2)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
+ }
+ } else if(stem.endsWith("í´")||stem.endsWith("ì¼")) {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
+ } else if(stem.endsWith("í´")) {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),0,0)
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
+ } else if(lsJaso.length==2&&lsJaso[1]=='ã
') {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_UO)});
+ } else if(lsJaso.length==2&&lsJaso[1]=='ã
') {
+ // ì¼ íë½
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_UO)});
+ // ì ë¶ê·ì¹
+ results.add(
+ new String[]{stem
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_AH)});
+ } else if(lsJaso[1]=='ã
') {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
+ } else if(lsJaso[1]=='ã
') {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),11,0)
+ ,makeTesnseEomi("ì´",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_OE)});
+ } else if(lsJaso[1]=='ã
') {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),0,27)
+ ,makeTesnseEomi("ì",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
+ } else if(lsJaso[1]=='ã
') {
+ results.add(
+ new String[]{stem.substring(0,stem.length()-1)+
+ MorphUtil.makeChar(stem.charAt(stem.length()-1),2,27)
+ ,makeTesnseEomi("ì",ending)
+ ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
+ }
+ }
+
+ /**
+ * ìì ì ì´ë¯¸ë§ì ë§ë¤ì´ì ë°ííë¤.
+ * @param preword 'ì' ëë 'ì´'
+ * @param endword ì´ë¯¸[ì ì´ë¯¸ë§ì í¬í¨]
+ * @return 'ì' ëë 'ì'ì ë§ë¤ì´ì ë°ííë¤.
+ */
+ public static String makeTesnseEomi(String preword, String endword) {
+
+ if(preword==null||preword.length()==0) return endword;
+ if(endword==null||endword.length()==0) return preword;
+
+ if(endword.charAt(0)=='ã
') {
+ return preword.substring(0,preword.length()-1)+
+ MorphUtil.makeChar(preword.charAt(preword.length()-1),20)+endword.substring(1,endword.length());
+ } else if(endword.charAt(0)=='ã´') {
+ return preword.substring(0,preword.length()-1)+
+ MorphUtil.makeChar(preword.charAt(preword.length()-1),4)+endword.substring(1,endword.length());
+ } else if(endword.charAt(0)=='ã¹') {
+ return preword.substring(0,preword.length()-1)+
+ MorphUtil.makeChar(preword.charAt(preword.length()-1),8)+endword.substring(1,endword.length());
+ } else if(endword.charAt(0)=='ã
') {
+ return preword.substring(0,preword.length()-1)+
+ MorphUtil.makeChar(preword.charAt(preword.length()-1),16)+endword.substring(1,endword.length());
+ } else if(endword.charAt(0)=='ã
') {
+ return preword.substring(0,preword.length()-1)+
+ MorphUtil.makeChar(preword.charAt(preword.length()-1),17)+endword.substring(1,endword.length());
+ }
+ return preword+endword;
+ }
/**
* 'ì/기' + 'ì´' + ì´ë¯¸, 'ìì/ë¶í°/ììë¶í°' + 'ì´' + ì´ë¯¸ ì¸ì§ ì¡°ì¬íë¤.
@@ -537,129 +533,128 @@ public class EomiUtil {
* @return
*/
public static boolean endsWithEEomi(String stem) {
- int len = stem.length();
- if(len<2||!stem.endsWith("ì´")) return false;
-
- char[] jasos = MorphUtil.decompose(stem.charAt(len-2));
- if(jasos.length==3&&jasos[2]=='ã
')
- return true;
- else {
- int index = stem.lastIndexOf("기");
- if(index==-1) index = stem.lastIndexOf("ìì");
- if(index==-1) index = stem.lastIndexOf("ë¶í°");
- if(index==-1) return false;
- return true;
- }
+ int len = stem.length();
+ if(len<2||!stem.endsWith("ì´")) return false;
+
+ char[] jasos = MorphUtil.decompose(stem.charAt(len-2));
+ if(jasos.length==3&&jasos[2]=='ã
')
+ return true;
+ else {
+ int index = stem.lastIndexOf("기");
+ if(index==-1) index = stem.lastIndexOf("ìì");
+ if(index==-1) index = stem.lastIndexOf("ë¶í°");
+ if(index==-1) return false;
+ return true;
+ }
}
- private static void setPomiResult(String[] results,String stem, String pomi ) {
- results[0] = stem;
- results[1] = pomi;
- }
-
- /**
- *
- * @param ch
- * @return
- */
- public static boolean IsNLMBSyl(char ech, char lch) throws MorphException {
-
- char[] features = SyllableUtil.getFeature(ech);
-
- switch(lch) {
-
- case 'ã´' :
- return (features[SyllableUtil.IDX_YNPNA]=='1' || features[SyllableUtil.IDX_YNPLN]=='1');
- case 'ã¹' :
- return (features[SyllableUtil.IDX_YNPLA]=='1');
- case 'ã
' :
- return (features[SyllableUtil.IDX_YNPMA]=='1');
- case 'ã
' :
- return (features[SyllableUtil.IDX_YNPBA]=='1');
- }
-
- return false;
- }
-
- /**
- * ì´ë¯¸ë¥¼ ë¶ë¦¬íë¤.
- *
- * 1. ê·ì¹ì©ì¸ê³¼ ì´ê°ë§ ë°ëë ë¶ê·ì¹ ì©ì¸
- * 2. ì´ë¯¸ê° ì¢
ì± 'ã´/ã¹/ã
/ã
'ì¼ë¡ ììëë ì´ì
- * 3. 'ì¬/ê±°ë¼/ëë¼'ì ë¶ê·ì¹ ì´ì
- * 4. ì´ë¯¸ 'ì/ì´'ê° íë½ëë ì´ì
- * 5. 'ì/ì´'ì ë³ì´ì²´ ë¶ë¦¬
- *
- * @param stem
- * @param end
- * @return
- * @throws MorphException
- */
- public static String[] splitEomi(String stem, String end) throws MorphException {
-
- String[] strs = new String[2];
- int strlen = stem.length();
- if(strlen==0) return strs;
-
- char estem = stem.charAt(strlen-1);
- char[] chrs = MorphUtil.decompose(estem);
- if(chrs.length==1) return strs; // íê¸ì´ ìëë¼ë©´...
-
- if((chrs.length==3)&&(chrs[2]=='ã´'||chrs[2]=='ã¹'||chrs[2]=='ã
'||chrs[2]=='ã
')&&
- EomiUtil.IsNLMBSyl(estem,chrs[2])&&
- DictionaryUtil.combineAndEomiCheck(chrs[2], end)!=null) {
- strs[1] = Character.toString(chrs[2]);
- if(end.length()>0) strs[1] += end;
- strs[0] = stem.substring(0,strlen-1) + MorphUtil.makeChar(estem, 0);
- } else if(estem=='í´'&&DictionaryUtil.existEomi("ì´"+end)) {
- strs[0] = stem.substring(0,strlen-1)+"í";
- strs[1] = "ì´"+end;
- } else if(estem=='í'&&DictionaryUtil.existEomi("ì´"+end)) {
- strs[0] = stem.substring(0,strlen-1)+"í";
- strs[1] = "ì´"+end;
- } else if(chrs[0]!='ã
'&&
- (chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
')&&
- (chrs.length==2 || SyllableUtil.getFeature(estem)[SyllableUtil.IDX_YNPAH]=='1')&&
- (DictionaryUtil.combineAndEomiCheck('ì´', end)!=null)) {
-
- strs[0] = stem;
- if(chrs.length==2) strs[1] = "ì´"+end;
- else strs[1] = end;
- } else if(stem.endsWith("í")&&"ì¬".equals(end)) {
- strs[0] = stem;
- strs[1] = "ì´";
- }else if((chrs.length==2)&&(chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
')&&
- (DictionaryUtil.combineAndEomiCheck('ì´', end)!=null)) {
-
- StringBuffer sb = new StringBuffer();
-
- if(strlen>1) sb.append(stem.substring(0,strlen-1));
-
- if(chrs[1]=='ã
')
- sb.append(MorphUtil.makeChar(estem, 8, 0)).append(MorphUtil.replaceJongsung('ì',estem));
- else if(chrs[1]=='ã
')
- sb.append(MorphUtil.makeChar(estem, 13, 0)).append(MorphUtil.replaceJongsung('ì´',estem));
- else if(chrs[1]=='ã
')
- sb.append(MorphUtil.makeChar(estem, 11, 0)).append(MorphUtil.replaceJongsung('ì´',estem));
- else if(chrs[1]=='ã
')
- sb.append(Character.toString(MorphUtil.makeChar(estem, 20, 0))).append(MorphUtil.replaceJongsung('ì´',estem));
- else if(chrs[1]=='ã
')
- sb.append(MorphUtil.makeChar(estem, 0, 0)).append(MorphUtil.replaceJongsung('ì´',estem));
- else if(chrs[1]=='ã
')
- sb.append(MorphUtil.makeChar(estem, 20, 0)).append(MorphUtil.replaceJongsung('ì ',estem));
-
-
- strs[0] = sb.toString();
-
- end = strs[0].substring(strs[0].length()-1)+end;
- strs[0] = strs[0].substring(0,strs[0].length()-1);
-
- strs[1] = end;
-
- }else if(!"".equals(end)&&DictionaryUtil.existEomi(end)) {
- strs = new String[]{stem, end};
- }
+ private static void setPomiResult(String[] results,String stem, String pomi ) {
+ results[0] = stem;
+ results[1] = pomi;
+ }
+
+ /**
+ *
+ * @param ch
+ * @return
+ */
+ public static boolean IsNLMBSyl(char ech, char lch) throws MorphException {
+
+ char[] features = SyllableUtil.getFeature(ech);
+
+ switch(lch) {
+
+ case 'ã´' :
+ return (features[SyllableUtil.IDX_YNPNA]=='1' || features[SyllableUtil.IDX_YNPLN]=='1');
+ case 'ã¹' :
+ return (features[SyllableUtil.IDX_YNPLA]=='1');
+ case 'ã
' :
+ return (features[SyllableUtil.IDX_YNPMA]=='1');
+ case 'ã
' :
+ return (features[SyllableUtil.IDX_YNPBA]=='1');
+ }
+
+ return false;
+ }
+
+ /**
+ * ì´ë¯¸ë¥¼ ë¶ë¦¬íë¤.
+ *
+ * 1. ê·ì¹ì©ì¸ê³¼ ì´ê°ë§ ë°ëë ë¶ê·ì¹ ì©ì¸
+ * 2. ì´ë¯¸ê° ì¢
ì± 'ã´/ã¹/ã
/ã
'ì¼ë¡ ììëë ì´ì
+ * 3. 'ì¬/ê±°ë¼/ëë¼'ì ë¶ê·ì¹ ì´ì
+ * 4. ì´ë¯¸ 'ì/ì´'ê° íë½ëë ì´ì
+ * 5. 'ì/ì´'ì ë³ì´ì²´ ë¶ë¦¬
+ *
+ * @param stem
+ * @param end
+ * @return
+ * @throws MorphException
+ */
+ public static String[] splitEomi(String stem, String end) throws MorphException {
+
+ String[] strs = new String[2];
+ int strlen = stem.length();
+ if(strlen==0) return strs;
+
+ char estem = stem.charAt(strlen-1);
+ char[] chrs = MorphUtil.decompose(estem);
+ if(chrs.length==1) return strs; // íê¸ì´ ìëë¼ë©´...
+
+ if((chrs.length==3)&&(chrs[2]=='ã´'||chrs[2]=='ã¹'||chrs[2]=='ã
'||chrs[2]=='ã
')&&
+ EomiUtil.IsNLMBSyl(estem,chrs[2])&&
+ DictionaryUtil.combineAndEomiCheck(chrs[2], end)!=null) {
+ strs[1] = Character.toString(chrs[2]);
+ if(end.length()>0) strs[1] += end;
+ strs[0] = stem.substring(0,strlen-1) + MorphUtil.makeChar(estem, 0);
+ } else if(estem=='í´'&&DictionaryUtil.existEomi("ì´"+end)) {
+ strs[0] = stem.substring(0,strlen-1)+"í";
+ strs[1] = "ì´"+end;
+ } else if(estem=='í'&&DictionaryUtil.existEomi("ì´"+end)) {
+ strs[0] = stem.substring(0,strlen-1)+"í";
+ strs[1] = "ì´"+end;
+ } else if(chrs[0]!='ã
'&&
+ (chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
')&&
+ (chrs.length==2 || SyllableUtil.getFeature(estem)[SyllableUtil.IDX_YNPAH]=='1')&&
+ (DictionaryUtil.combineAndEomiCheck('ì´', end)!=null)) {
+
+ strs[0] = stem;
+ if(chrs.length==2) strs[1] = "ì´"+end;
+ else strs[1] = end;
+ } else if(stem.endsWith("í")&&"ì¬".equals(end)) {
+ strs[0] = stem;
+ strs[1] = "ì´";
+ }else if((chrs.length==2)&&(chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
')&&
+ (DictionaryUtil.combineAndEomiCheck('ì´', end)!=null)) {
+
+ StringBuffer sb = new StringBuffer();
+
+ if(strlen>1) sb.append(stem.substring(0,strlen-1));
+
+ if(chrs[1]=='ã
')
+ sb.append(MorphUtil.makeChar(estem, 8, 0)).append(MorphUtil.replaceJongsung('ì',estem));
+ else if(chrs[1]=='ã
')
+ sb.append(MorphUtil.makeChar(estem, 13, 0)).append(MorphUtil.replaceJongsung('ì´',estem));
+ else if(chrs[1]=='ã
')
+ sb.append(MorphUtil.makeChar(estem, 11, 0)).append(MorphUtil.replaceJongsung('ì´',estem));
+ else if(chrs[1]=='ã
')
+ sb.append(Character.toString(MorphUtil.makeChar(estem, 20, 0))).append(MorphUtil.replaceJongsung('ì´',estem));
+ else if(chrs[1]=='ã
')
+ sb.append(MorphUtil.makeChar(estem, 0, 0)).append(MorphUtil.replaceJongsung('ì´',estem));
+ else if(chrs[1]=='ã
')
+ sb.append(MorphUtil.makeChar(estem, 20, 0)).append(MorphUtil.replaceJongsung('ì ',estem));
+
+ strs[0] = sb.toString();
+
+ end = strs[0].substring(strs[0].length()-1)+end;
+ strs[0] = strs[0].substring(0,strs[0].length()-1);
+
+ strs[1] = end;
+
+ }else if(!"".equals(end)&&DictionaryUtil.existEomi(end)) {
+ strs = new String[]{stem, end};
+ }
- return strs;
- }
+ return strs;
+ }
}