You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/18 19:20:27 UTC
svn commit: r1533562 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko:
morph/ utils/
Author: rmuir
Date: Fri Oct 18 17:20:26 2013
New Revision: 1533562
URL: http://svn.apache.org/r1533562
Log:
LUCENE-4956: remove some dead code
Removed:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AbbrevAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounProperty.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/SpaceOutput.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/Status.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSAOutput.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSCandidateComparator.java
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java Fri Oct 18 17:20:26 2013
@@ -347,46 +347,6 @@ public class CompoundNounAnalyzer {
return maxlen;
}
- private int evaluation(List<CompoundEntry> candidates) {
-
- int eval = 10;
-
- int one = 0;
- int exist = 0;
-
- for(CompoundEntry entry : candidates) {
- if(entry.getWord().length()==1) one++;
- if(entry.isExist()) exist++;
- }
-
- if(one>3) return eval;
-
- eval = eval + (exist*100)/candidates.size() - (one*100)/candidates.size();
-
- return eval;
- }
-
- private boolean containWord(String before, String input, int pos) {
-
- String prev = null;
- for(int i=pos;i<input.length();i++) {
-
- String text = before+input.substring(pos,i+1);
- if(DictionaryUtil.findWithPrefix(text).hasNext()) {
- prev = text;
- continue;
- }
-
- if(prev!=null&&DictionaryUtil.getNoun(prev)!=null) return true;
-
- break;
- }
-
- return false;
-
- }
-
-
private CompoundEntry[] analysisBySplited(int[] units, String input, boolean isFirst) {
CompoundEntry[] entries = new CompoundEntry[units.length];
@@ -410,18 +370,6 @@ public class CompoundNounAnalyzer {
}
- private boolean canCompound(CompoundEntry[] entries, int thredhold) {
-
- int achived = 0;
- for(int i=0;i<entries.length;i++) {
- if(entries[i].isExist()) achived += score;
- }
-
- if(achived<thredhold) return false;
-
- return true;
- }
-
/**
* ì
ë ¥ë String ì CompoundEntry ë¡ ë³í
* @param input input
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java Fri Oct 18 17:20:26 2013
@@ -394,18 +394,6 @@ public class MorphAnalyzer {
VerbUtil.analysisVMXM(o.clone(), candidates);
}
- public void analysisCNoun(List<AnalysisOutput> candidates) {
-
- boolean success = false;
- for(AnalysisOutput o: candidates) {
- if(o.getPos()!=PatternConstants.POS_NOUN) continue;
- if(o.getScore()==AnalysisOutput.SCORE_CORRECT)
- success=true;
- else if(!success)
- confirmCNoun(o);
- }
- }
-
/**
* ë³µí©ëª
ì¬ì¸ì§ ì¡°ì¬íê³ , ë³µí©ëª
ì¬ì´ë©´ ë¨ìëª
ì¬ë¤ì ì°¾ëë¤.
* ë³µí©ëª
ì¬ì¸ì§ ì¬ë¶ë ë¨ìëª
ì¬ê° 모ë ì¬ì ì ìëì§ ì¬ë¶ë¡ íë¨íë¤.
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java Fri Oct 18 17:20:26 2013
@@ -101,49 +101,6 @@ public class ConstraintUtil {
if(hahes.get(key)!=null) return true;
return false;
}
-
- /**
- * ì´ë¯¸ê° ã´,ã¹,ã
ì¼ë¡ ëëëì§ ì¡°ì¬íë¤.
- */
- public static boolean isNLM(String eomi) {
-
- if(eomi==null || "".equals(eomi)) return false;
-
- if(eomiPnouns.get(eomi)!=null) return true;
-
- char[] chrs = MorphUtil.decompose(eomi.charAt(eomi.length()-1));
- if(chrs.length==3 && eomiPnouns.get(Character.toString(chrs[2]))!=null) return true;
-
- return true;
- }
-
- public static boolean isEomiPhrase(int ptn) {
-
- if(PTN_MLIST.get(ptn)!=null) return true;
-
- return false;
- }
-
- public static boolean isJosaNounPhrase(int ptn) {
-
- if(PTN_JLIST.get(ptn)!=null) return true;
-
- return false;
- }
-
- public static boolean isJosaAdvPhrase(int ptn) {
-
- if(PatternConstants.PTN_ADVJ==ptn) return true;
-
- return false;
- }
-
- public static boolean isAdvPhrase(int ptn) {
-
- if(PatternConstants.PTN_ADVJ==ptn || PatternConstants.PTN_AID==ptn) return true;
-
- return false;
- }
public static boolean isTwoJosa(String josa) {
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java Fri Oct 18 17:20:26 2013
@@ -17,12 +17,7 @@ package org.apache.lucene.analysis.ko.ut
* limitations under the License.
*/
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
-import org.apache.lucene.analysis.ko.morph.AnalysisOutput;
-import org.apache.lucene.analysis.ko.morph.PatternConstants;
public class EomiUtil {
private EomiUtil() {}
@@ -36,91 +31,6 @@ public class EomiUtil {
};
/**
- * ê°ì¥ 길ì´ê° 긴 ì´ë¯¸ë¥¼ ë¶ë¦¬íë¤.
- * @param term term
- */
- public static String[] longestEomi(String term) {
-
- String[] result = new String[2];
- result[0] = term;
-
- String stem;
- String eomi;
- char[] efeature;
-
- for(int i=term.length();i>0;i--) {
-
- stem = term.substring(0,i);
-
- if(i!=term.length()) {
- eomi = term.substring(i);
- efeature = SyllableUtil.getFeature(eomi.charAt(0));
- } else {
- efeature = SyllableUtil.getFeature(stem.charAt(i-1));
- eomi="";
- }
-
- if(SyllableUtil.isAlpanumeric(stem.charAt(i-1))) break;
-
- char[] jasos = MorphUtil.decompose(stem.charAt(i-1));
-
- if(!"".equals(eomi)&&!DictionaryUtil.existEomi(eomi)) {
- // do not anything.
- } else if(jasos.length>2&&
- (jasos[2]=='ã´'||jasos[2]=='ã¹'||jasos[2]=='ã
'||jasos[2]=='ã
')&&
- DictionaryUtil.combineAndEomiCheck(jasos[2], eomi)!=null) {
- result[0] = Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 0));
- if(i!=0) result[0] = stem.substring(0,i-1)+result[0];
- result[1] = Character.toString(jasos[2]);
- }else if(i>0&&(stem.endsWith("í")&&"ì¬".equals(eomi))||
- (stem.endsWith("ê°")&&"ê±°ë¼".equals(eomi))||
- (stem.endsWith("ì¤")&&"ëë¼".equals(eomi))) {
- result[0] = stem;
- result[1] = eomi;
- }else if(jasos.length==2&&(!stem.endsWith("ì")&&!stem.endsWith("ì´"))&&
- (jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
')&&
- (DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {
- char[] chs = MorphUtil.decompose(stem.charAt(stem.length()-1));
- result[0] = stem;
- result[1] = "ì´"+eomi;
- }else if((jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
'||jasos[1]=='ã
')&&
- (DictionaryUtil.combineAndEomiCheck('ì´', eomi)!=null)) {
- String end = "";
- if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 8, 0)+"ì";
- else if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 13, 0)+"ì´";
- else if(jasos[1]=='ã
')
- end=Character.toString(MorphUtil.makeChar(stem.charAt(i-1), 6, 0));
- else if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 0, 0)+"ì´";
- else if(jasos[1]=='ã
')
- end=MorphUtil.makeChar(stem.charAt(i-1), 20, 0)+"ì ";
-
- if(jasos.length==3) {
- end = end.substring(0,end.length()-1)+MorphUtil.replaceJongsung(end.charAt(end.length()-1),stem.charAt(i-1));
- }
-
- if(stem.length()<2) result[0] = end;
- else result[0] = stem.substring(0,stem.length()-1)+end;
- result[1] = eomi;
-
- }else if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI1]!='0'&&
- DictionaryUtil.existEomi(eomi)) {
- if(!(((jasos.length==2&&jasos[0]=='ã¹')||(jasos.length==3&&jasos[2]=='ã¹'))&&eomi.equals("ë¬"))) { // ã¹ ë¶ê·ì¹ì ìì¸
- result[0] = stem;
- result[1] = eomi;
- }
- }
-
- if(efeature!=null&&efeature[SyllableUtil.IDX_EOMI2]=='0') break;
- }
-
- return result;
-
- }
-
- /**
* ì ì´ë§ì´ë¯¸ë¥¼ ë¶ìíë¤.
*/
public static String[] splitPomi(String stem) {
@@ -230,311 +140,6 @@ public class EomiUtil {
return results;
}
-
- /**
- * ë¶ê·ì¹ ì©ì¸ì ìíì 구íë¤.
- */
- public static List<AnalysisOutput> irregular(AnalysisOutput output) {
-
- List<AnalysisOutput> results = new ArrayList<AnalysisOutput>();
-
- if(output.getStem()==null||output.getStem().length()==0)
- return results;
-
- String ending = output.getEomi();
- if(output.getPomi()!=null) ending = output.getPomi();
-
- List<String[]> irrs = new ArrayList<String[]>();
-
- irregularStem(irrs,output.getStem(),ending);
- irregularEnding(irrs,output.getStem(),ending);
- irregularAO(irrs,output.getStem(),ending);
-
- for(String[] irr: irrs) {
- AnalysisOutput result = output.clone();
- result.setStem(irr[0]);
- if(output.getPatn()==PatternConstants.PTN_VM) {
- if(output.getPomi()==null) result.setEomi(irr[1]);
- else result.setPomi(irr[1]);
- }
- results.add(result);
- }
-
- return results;
-
- }
-
- /**
- * ì´ê°ë§ ë³íë ê²½ì°
- * @param results results
- * @param stem stem
- * @param ending ending
- */
- private static void irregularStem(List<String[]> results, String stem, String ending) {
-
- char feCh = ending.charAt(0);
- char[] fechJaso = MorphUtil.decompose(feCh);
- char ls = stem.charAt(stem.length()-1);
- char[] lsJaso = MorphUtil.decompose(ls);
-
- if(feCh=='ì'||feCh=='ì´'||feCh=='ì¼') {
- if(lsJaso[lsJaso.length-1]=='ã¹') { // ã· ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),7)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_DI)});
- } else if(lsJaso.length==2) { // ã
ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),19)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_SI)});
- }
- }
-
- if((fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã
'|| feCh=='ì¤'||feCh=='ì')
- &&(ls=='ì°')) { // ã
ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),17)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
- }
-
- if((fechJaso[0]=='ã´'||fechJaso[0]=='ã
'||fechJaso[0]=='ã
'|| feCh=='ì¤')
- &&(lsJaso.length==2)) { // ã¹ íë½
-
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),8)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_LI)});
- }
-
- if(lsJaso.length==2
- &&(fechJaso[0]=='ã´'||fechJaso[0]=='ã¹'||fechJaso[0]=='ã
'||fechJaso[0]=='ã
'||
- lsJaso[1]=='ã
'||lsJaso[1]=='ã
'||lsJaso[1]=='ã
'||lsJaso[1]=='ã
')
- &&!"ë".equals(stem)) { // ã
ë¶ê·ì¹, ê·¸ë¬ë [ë³ë¤]ë ã
ë¶ê·ì¹ì´ ìëë¤.
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),27)
- ,ending
- ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
- }
- }
-
- /**
- * ì´ë¯¸ë§ ë³íë ê²½ì°
- * @param results results
- * @param stem stem
- * @param ending ending
- */
- private static void irregularEnding(List<String[]> results, String stem, String ending) {
- if(ending.startsWith("ã
")) return;
-
- char feCh = ending.charAt(0);
- char ls = stem.charAt(stem.length()-1);
-
- if(feCh=='ë¬'&&ls=='르') { // 'ë¬' ë¶ê·ì¹
- results.add(
- new String[]{stem
- ,"ì´"+ending.substring(1)
- ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
- } else if("ë¼".equals(ending)&&"ê°ê±°".equals(stem)) { // 'ê±°ë¼' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)
- ,"ì´ë¼"
- ,String.valueOf(PatternConstants.IRR_TYPE_GU)});
- } else if("ë¼".equals(ending)&&"ì¤ë".equals(stem)) { // 'ëë¼' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)
- ,"ì´ë¼"
- ,String.valueOf(PatternConstants.IRR_TYPE_NU)});
- }
-
- if("ì¬".equals(ending)&&ls=='í') { // 'ì¬' ë¶ê·ì¹
- results.add(
- new String[]{stem
- ,"ì´"
- ,String.valueOf(PatternConstants.IRR_TYPE_NU)});
- }
- }
-
- /**
- * ì´ê°ê³¼ ì´ë¯¸ê° 모ë ë³íë ê²½ì°
- * @param results results
- * @param stem stem
- * @param ending ending
- */
- private static void irregularAO(List<String[]> results, String stem, String ending) {
-
- char ls = stem.charAt(stem.length()-1);
- char[] lsJaso = MorphUtil.decompose(ls);
-
- if(lsJaso.length<2) return;
-
- if(lsJaso[1]=='ã
') {
- if(stem.endsWith("ëì")||stem.endsWith("ê³ ì")) { // 'ê³±ë¤', 'ëë¤'ì 'ã
' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã
'
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
- }else { // 'ì' ì¶ì½
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),8,0) // ìì + ã
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
- }
- } else if(stem.endsWith("í¼")) {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0) // ìì + -
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
- } else if(lsJaso[1]=='ã
') {
- if(stem.length()>=2) // 'ã
' ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),17) // + 'ã
'
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_BI)});
-
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),13,0) // ìì + ã
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_WA)});
- } else if(stem.length()>=2&&ls=='ë¼') {
- char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
- if(ns.length==3&&ns[2]=='ã¹') { // 르 ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
- }
- } else if(stem.length()>=2&&ls=='ë¬') {
- char[] ns = MorphUtil.decompose(stem.charAt(stem.length()-2));
- if(stem.charAt(stem.length()-2)=='르') { // ë¬ ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-1)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_LO)});
- } else if(ns.length==3&&ns[2]=='ã¹') { // 르 ë¶ê·ì¹
- results.add(
- new String[]{stem.substring(0,stem.length()-2)+
- MorphUtil.makeChar(stem.charAt(stem.length()-2),0) + "르"
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_RO)});
- }
- } else if(stem.endsWith("í´")||stem.endsWith("ì¼")) {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
- } else if(stem.endsWith("í´")) {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),0,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
- } else if(lsJaso.length==2&&lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_UO)});
- } else if(lsJaso.length==2&&lsJaso[1]=='ã
') {
- // ì¼ íë½
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),18,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_UO)});
- // ì ë¶ê·ì¹
- results.add(
- new String[]{stem
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_AH)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),20,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_EI)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),11,0)
- ,makeTesnseEomi("ì´",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_OE)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),0,27)
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
- } else if(lsJaso[1]=='ã
') {
- results.add(
- new String[]{stem.substring(0,stem.length()-1)+
- MorphUtil.makeChar(stem.charAt(stem.length()-1),2,27)
- ,makeTesnseEomi("ì",ending)
- ,String.valueOf(PatternConstants.IRR_TYPE_HI)});
- }
- }
-
- /**
- * ìì ì ì´ë¯¸ë§ì ë§ë¤ì´ì ë°ííë¤.
- * @param preword 'ì' ëë 'ì´'
- * @param endword ì´ë¯¸[ì ì´ë¯¸ë§ì í¬í¨]
- * return 'ì' ëë 'ì'ì ë§ë¤ì´ì ë°ííë¤.
- */
- public static String makeTesnseEomi(String preword, String endword) {
-
- if(preword==null||preword.length()==0) return endword;
- if(endword==null||endword.length()==0) return preword;
-
- if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),20)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã´') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),4)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã¹') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),8)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),16)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- MorphUtil.makeChar(preword.charAt(preword.length()-1),17)+endword.substring(1,endword.length());
- }
- return preword+endword;
- }
-
- /**
- * 'ì/기' + 'ì´' + ì´ë¯¸, 'ìì/ë¶í°/ììë¶í°' + 'ì´' + ì´ë¯¸ ì¸ì§ ì¡°ì¬íë¤.
- */
- public static boolean endsWithEEomi(String stem) {
- int len = stem.length();
- if(len<2||!stem.endsWith("ì´")) return false;
-
- char[] jasos = MorphUtil.decompose(stem.charAt(len-2));
- if(jasos.length==3&&jasos[2]=='ã
')
- return true;
- else {
- int index = stem.lastIndexOf("기");
- if(index==-1) index = stem.lastIndexOf("ìì");
- if(index==-1) index = stem.lastIndexOf("ë¶í°");
- if(index==-1) return false;
- return true;
- }
- }
private static void setPomiResult(String[] results,String stem, String pomi ) {
results[0] = stem;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/MorphUtil.java Fri Oct 18 17:20:26 2013
@@ -19,10 +19,8 @@ package org.apache.lucene.analysis.ko.ut
import java.util.List;
-import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
import org.apache.lucene.analysis.ko.morph.AnalysisOutput;
import org.apache.lucene.analysis.ko.morph.PatternConstants;
-import org.apache.lucene.analysis.ko.morph.WordEntry;
public class MorphUtil {
private MorphUtil() {}
@@ -136,58 +134,6 @@ public class MorphUtil {
}
}
-
- /**
- * ì©ì¸ + 'ì/기' + 'ì´' + ì´ë¯¸, ì²´ì¸ + 'ìì/ë¶í°/ììë¶í°' + 'ì´' + ì´ë¯¸
- * @param output the output text
- * @param candidates the candidates
- */
- public static void buildPtnCM(AnalysisOutput output, List<AnalysisOutput> candidates) {
-
- char ch = output.getStem().charAt(output.getStem().length()-2);
- char[] jasos = MorphUtil.decompose(ch);
- if(jasos.length==3||ch=='기') {
- buildPtnVMCM(output,candidates);
- } else {
-
- }
- }
-
- private static void buildPtnVMCM(AnalysisOutput output, List<AnalysisOutput> candidates) {
- String stem = output.getStem();
-
- output.setPatn(PatternConstants.PTN_VMCM);
- output.setPos(PatternConstants.POS_VERB);
-
- char ch = stem.charAt(stem.length()-2);
- char[] jasos = MorphUtil.decompose(ch);
-
- if(ch=='기') {
- output.addElist("기");
- output.addElist("ì´");
- output.setStem(stem.substring(0,stem.length()-2));
-
- if(DictionaryUtil.getVerb(output.getStem())!=null)
- candidates.add(output);
- }else if(jasos[2]=='ã
') {
- if(stem.length()>1) stem = stem.substring(0,stem.length()-2);
- stem += MorphUtil.makeChar(ch, 0);
- output.addElist("ã
");
- output.addElist("ì´");
- output.setStem(stem);
-
- if(DictionaryUtil.getVerb(stem)!=null)
- candidates.add(output);
- else {
- String[] morphs = IrregularUtil.restoreIrregularVerb(stem,"ã
");
- if(morphs!=null) {
- output.setScore(AnalysisOutput.SCORE_CORRECT);
- output.setStem(morphs[0]);
- candidates.add(output);
- }
- }
- }
- }
public static boolean hasVerbOnly(String input) {
@@ -197,43 +143,4 @@ public class MorphUtil {
}
return false;
}
-
- /**
- * ìì ì ì´ë¯¸ë§ì ë§ë¤ì´ì ë°ííë¤.
- * @param preword 'ì' ëë 'ì´'
- * @param endword ì´ë¯¸[ì ì´ë¯¸ë§ì í¬í¨]
- * @return 'ì' ëë 'ì'ì ë§ë¤ì´ì ë°ííë¤.
- */
- public static String makeTesnseEomi(String preword, String endword) {
-
- if(preword==null||preword.length()==0) return endword;
- if(endword==null||endword.length()==0) return preword;
-
- if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- makeChar(preword.charAt(preword.length()-1),20)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã´') {
- return preword.substring(0,preword.length()-1)+
- makeChar(preword.charAt(preword.length()-1),4)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã¹') {
- return preword.substring(0,preword.length()-1)+
- makeChar(preword.charAt(preword.length()-1),8)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- makeChar(preword.charAt(preword.length()-1),16)+endword.substring(1,endword.length());
- } else if(endword.charAt(0)=='ã
') {
- return preword.substring(0,preword.length()-1)+
- makeChar(preword.charAt(preword.length()-1),17)+endword.substring(1,endword.length());
- }
- return preword+endword;
- }
-
- /**
- * ì©ì¸íì 미ì¬ê° ê²°í©ë ì ìëì§ ì¬ë¶ë¥¼ ì ê²íë¤.
- * í¹í ì¬ì ì ë±ë¡ë ëë¤, íë¤í ì ì ìì´ ê°ë¥íì§ë¥¼ ì¡°ì¬íë¤.
- */
- public static boolean isValidSuffix(WordEntry entry, AnalysisOutput o) {
-
- return true;
- }
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java Fri Oct 18 17:20:26 2013
@@ -216,121 +216,6 @@ public class NounUtil {
return true;
}
-
- /**
- * ë³µí©ëª
ì¬ì¸ì§ ì¡°ì¬íê³ , ë³µí©ëª
ì¬ì´ë©´ ë¨ìëª
ì¬ë¤ì ì°¾ëë¤.
- * ë³µí©ëª
ì¬ì¸ì§ ì¬ë¶ë ë¨ìëª
ì¬ê° 모ë ì¬ì ì ìëì§ ì¬ë¶ë¡ íë¨íë¤.
- * ë¨ìëª
ì¬ë 2ê¸ì ì´ì ë¨ì´ììë§ ì°¾ëë¤.
- * @param o
- * @throws MorphException
- */
-// public static boolean confirmCNoun(AnalysisOutput o) {
-//
-// if(o.getStem().length()<3) return false;
-// if(o.getPatn()==PatternConstants.PTN_N
-// &&DictionaryUtil.existJosa(o.getStem().substring(o.getStem().length()-2))) return false;
-//
-// List<CompoundEntry> results = new ArrayList();
-// List<List> queue = new ArrayList();
-// String prefix = o.getStem().substring(0,1);
-//
-// int pos = 0;
-// boolean moreTwo = false;
-// while(pos<o.getStem().length()) {
-//
-// List<WordEntry> nList = findNouns(o.getStem().substring(pos),queue.size(),o);
-// if(nList==null) return false;
-//
-// if(pos==0&&DictionaryUtil.existPrefix(prefix)) nList.add(new WordEntry(prefix));
-//
-// if(nList.size()==0) {
-// if(queue.size()==0) return false;
-// List<WordEntry> tmpList = queue.get(queue.size()-1);
-//
-// tmpList.remove(tmpList.size()-1);
-// pos -= results.get(queue.size()-1).getWord().length();
-// if(tmpList.size()==0) {
-// while(tmpList.size()==0) {
-// results.remove(queue.size()-1);
-// queue.remove(tmpList);
-// if(queue.size()==0) return false;
-//
-// tmpList = queue.get(queue.size()-1);
-// tmpList.remove(tmpList.size()-1);
-// if(tmpList.size()==0) continue;
-//
-// pos -= results.get(queue.size()-1).getWord().length();
-// results.set(queue.size()-1, new CompoundEntry(tmpList.get(tmpList.size()-1).getWord(),pos));
-// pos += tmpList.get(tmpList.size()-1).getWord().length();
-//
-// }
-// }else {
-// results.set(queue.size()-1, new CompoundEntry(tmpList.get(tmpList.size()-1).getWord(),pos));
-// pos += tmpList.get(tmpList.size()-1).getWord().length();
-// }
-//
-// } else {
-// queue.add(nList);
-// WordEntry noun = nList.get(nList.size()-1);
-// results.add(new CompoundEntry(noun.getWord(),pos));
-// pos += noun.getWord().length();
-// if(noun.getCompounds().size()>0) o.addCNoun(noun.getCompounds());
-// if(noun.getWord().length()>1) moreTwo=true;
-// }
-// }
-//
-// if(results.size()>1&&DNouns.contains(results.get(results.size()-1).getWord())) {
-// CompoundEntry dnoun = results.remove(results.size()-1);
-// o.setStem(o.getStem().substring(0,o.getStem().length()-dnoun.getWord().length()));
-// o.setNsfx(dnoun.getWord());
-// }
-//
-// if(results.size()>1) o.addCNoun(results);
-//
-// o.setScore(AnalysisOutput.SCORE_CORRECT);
-// return true;
-// }
-
- /**
- * ë³µí©ëª
ì¬ìì ë¨ìëª
ì¬ë¥¼ ë¶ë¦¬í´ë¸ë¤.
- * 리ì¤í¸ì ê°ì¥ ë§ì§ë§ì ìì¹í ë¨ì´ê° ìµì¥ë¨ì´ì´ë¤.
- * @param str ë³µí©ëª
ì¬
- * @param pos the analysing start point
- * @param o ë¶ìê²°ê³¼
- * return ë¨ìëª
ì¬ ë¦¬ì¤í¸
- */
- private static List<WordEntry> findNouns(String str, int pos, AnalysisOutput o) {
-
- List<WordEntry> nList = new ArrayList<WordEntry>();
-
- if(str.length()==2&&DictionaryUtil.existSuffix(str.substring(0,1))&&DNouns.contains(str.substring(1))) {
- o.setStem(o.getStem().substring(0,o.getStem().length()-1));
- o.setNsfx(str.substring(1));
- nList.add(new WordEntry(str.substring(0,1)));
- return nList;
- }else if(str.length()==2&&DictionaryUtil.existSuffix(str.substring(0,1))&&DictionaryUtil.existJosa(str.substring(1))) {
- return null;
- }
-
- if(pos>=2&&DictionaryUtil.existJosa(str)) return null;
-
- if(str.length()==1&&(DictionaryUtil.existSuffix(str)||DNouns.contains(str))) {
- nList.add(new WordEntry(str));
- return nList;
- }
-
- for(int i=1;i<str.length();i++) {
- String sub = str.substring(0,i+1);
- if(!DictionaryUtil.findWithPrefix(sub).hasNext()) break;
- WordEntry entry = DictionaryUtil.getAllNoun(sub);
- if(entry!=null) {
- nList.add(entry);
- }
- }
-
- return nList;
- }
-
/*
* ë§ì§ë§ ìì ì´ ëª
ì¬í ì 미ì¬(ë±,ì..)ì¸ì§ ì¡°ì¬íë¤.
*/
@@ -355,14 +240,6 @@ public class NounUtil {
return true;
}
-
-// public static int endsWithDNoun(String stem) {
-// for(int i = 0; i < DNouns.length; i++)
-// if(stem.endsWith(DNouns[i]))
-// return stem.lastIndexOf(DNouns[i]);
-//
-// return -1;
-// }
public static boolean endsWith2Josa(String input) {
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/SyllableUtil.java Fri Oct 18 17:20:26 2013
@@ -113,8 +113,4 @@ public class SyllableUtil {
return getFeature(idx);
}
-
- public static boolean isAlpanumeric(char ch) {
- return (ch>='0'&&ch<='z');
- }
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java?rev=1533562&r1=1533561&r2=1533562&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/VerbUtil.java Fri Oct 18 17:20:26 2013
@@ -81,16 +81,6 @@ public class VerbUtil {
}
- public static boolean constraintVerb(String start, String end) {
-
- char[] schs = MorphUtil.decompose(start.charAt(start.length()-1));
- char[] echs = MorphUtil.decompose(end.charAt(0));
-
- if(schs.length==3&&schs[2]=='ã¹'&&echs[0]=='ã¹') return false;
-
- return true;
- }
-
/**
* 3. íêµììì´ë¤ : ì²´ì¸ + 'ìì/ë¶í°/ììë¶í°' + 'ì´' + ì´ë¯¸ (PTN_NJCM) <br>
*/