You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 07:52:00 UTC
svn commit: r1534029 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko:
./ morph/
Author: rmuir
Date: Mon Oct 21 05:51:59 2013
New Revision: 1534029
URL: http://svn.apache.org/r1534029
Log:
LUCENE-4956: more morph cleanups
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java Mon Oct 21 05:51:59 2013
@@ -294,7 +294,6 @@ public final class KoreanFilter extends
for(int i=0;i<term.length();i++) {
char[] chs = HanjaMapper.convertToHangul(term.charAt(i));
- if(chs==null) continue;
List<StringBuilder> removeList = new ArrayList<StringBuilder>(); // ì ê±°ë í보를 ì ì¥
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutputComparator.java Mon Oct 21 05:51:59 2013
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ko.mo
import java.util.Comparator;
-public class AnalysisOutputComparator<T> implements Comparator<T> {
+class AnalysisOutputComparator<T> implements Comparator<T> {
public int compare(T o1, T o2) {
AnalysisOutput out1 = (AnalysisOutput)o1;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/ConstraintUtil.java Mon Oct 21 05:51:59 2013
@@ -23,12 +23,12 @@ package org.apache.lucene.analysis.ko.mo
class ConstraintUtil {
private ConstraintUtil() {}
- public static boolean canHaheCompound(String key) {
+ static boolean canHaheCompound(String key) {
return key.length() == 2 && ("민족".equals(key) || "ëì".equals(key) || "ë¨ë¶".equals(key));
}
// ì¢
ì±ì´ ìë ìì ê³¼ ì°ê²°ë ì ìë ì¡°ì¬
- public static boolean isTwoJosa(char josa) {
+ static boolean isTwoJosa(char josa) {
switch (josa) {
case 'ê°':
case 'ë':
@@ -49,7 +49,7 @@ class ConstraintUtil {
}
// ì¢
ì±ì´ ìë ìì ê³¼ ì°ê²°ë ì ìë ì¡°ì¬
- public static boolean isThreeJosa(char josa) {
+ static boolean isThreeJosa(char josa) {
switch (josa) {
case 'ê³¼':
case 'ì':
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java Mon Oct 21 05:51:59 2013
@@ -34,7 +34,7 @@ class EomiUtil {
/**
* ì ì´ë§ì´ë¯¸ë¥¼ ë¶ìíë¤.
*/
- public static String[] splitPomi(String stem) {
+ static String[] splitPomi(String stem) {
// results[0]:ì±ê³µ(1)/ì¤í¨(0), results[1]: ì´ê·¼, results[2]: ì ì´ë§ì´ë¯¸
String[] results = new String[2];
@@ -171,7 +171,7 @@ class EomiUtil {
* 4. ì´ë¯¸ 'ì/ì´'ê° íë½ëë ì´ì
* 5. 'ì/ì´'ì ë³ì´ì²´ ë¶ë¦¬
*/
- public static String[] splitEomi(String stem, String end) {
+ static String[] splitEomi(String stem, String end) {
String[] strs = new String[2];
int strlen = stem.length();
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/IrregularUtil.java Mon Oct 21 05:51:59 2013
@@ -26,7 +26,7 @@ import org.apache.lucene.analysis.ko.dic
class IrregularUtil {
private IrregularUtil() {}
- public static String[] restoreIrregularVerb(String start, String end) {
+ static String[] restoreIrregularVerb(String start, String end) {
if(end==null) end="";
char[] jasos = new char[0];
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java Mon Oct 21 05:51:59 2013
@@ -26,21 +26,6 @@ import org.apache.lucene.analysis.ko.dic
import org.apache.lucene.analysis.ko.dic.SyllableFeatures;
public class MorphAnalyzer {
-
- /**
- * starting word of sentence.
- */
- public static final int POS_START = 1;
-
- /**
- * middle word of sentence
- */
- public static final int POS_MID = 2;
-
- /**
- * ending word of sentence.
- */
- public static final int POS_END = 3;
private CompoundNounAnalyzer cnAnalyzer = new CompoundNounAnalyzer();
@@ -52,21 +37,12 @@ public class MorphAnalyzer {
cnAnalyzer.setExactMach(is);
}
- public List<AnalysisOutput> analyze(String input) {
-
- if(input.endsWith("."))
- return analyze(input.substring(0,input.length()-1), POS_END);
-
- return analyze(input, POS_MID);
- }
-
/**
*
* @param input input
- * @param pos pos
* @return candidates
*/
- public List<AnalysisOutput> analyze(String input, int pos) {
+ public List<AnalysisOutput> analyze(String input) {
List<AnalysisOutput> candidates = new ArrayList<AnalysisOutput>();
boolean isVerbOnly = MorphUtil.hasVerbOnly(input);
@@ -267,21 +243,20 @@ public class MorphAnalyzer {
WordEntry entry;
if((entry=DictionaryUtil.getWord(word))!=null) {
- if(entry.getFeature(WordEntry.IDX_NOUN)!='1'&&
- entry.getFeature(WordEntry.IDX_BUSA)=='1') {
+ if (entry.isCompoundNoun()) {
+ candidates.add(0,output);
+ } else if (entry.isNoun()) {
+ output.setScore(AnalysisOutput.SCORE_CORRECT);
+ candidates.add(0,output);
+ } else if (entry.isAdverb()) {
AnalysisOutput busa = new AnalysisOutput(word, null, null, PatternConstants.PTN_AID);
busa.setPos(PatternConstants.POS_ETC);
busa.setScore(AnalysisOutput.SCORE_CORRECT);
candidates.add(0,busa);
- }else if(entry.getFeature(WordEntry.IDX_NOUN)=='1') {
- output.setScore(AnalysisOutput.SCORE_CORRECT);
- candidates.add(0,output);
- }else if(entry.getFeature(WordEntry.IDX_NOUN)=='2') {
- candidates.add(0,output);
}
- if(entry.getFeature(WordEntry.IDX_VERB)!='1') return;
+ if(!entry.isVerb()) return;
} else if(candidates.size()==0||!NounUtil.endsWith2Josa(word)) {
output.setScore(AnalysisOutput.SCORE_ANALYSIS);
candidates.add(0,output);
@@ -298,7 +273,7 @@ public class MorphAnalyzer {
* @param end end
* @param candidates candidates
*/
- public void analysisWithJosa(String stem, String end, List<AnalysisOutput> candidates) {
+ void analysisWithJosa(String stem, String end, List<AnalysisOutput> candidates) {
if(stem==null||stem.length()==0) return;
char[] chrs = MorphUtil.decompose(stem.charAt(stem.length()-1));
@@ -314,7 +289,7 @@ public class MorphAnalyzer {
WordEntry entry = DictionaryUtil.getWordExceptVerb(stem);
if(entry!=null) {
output.setScore(AnalysisOutput.SCORE_CORRECT);
- if(entry.getFeature(WordEntry.IDX_NOUN)=='0'&&entry.getFeature(WordEntry.IDX_BUSA)=='1') {
+ if(!entry.isNoun() && entry.isAdverb()) {
output.setPos(PatternConstants.POS_ETC);
output.setPatn(PatternConstants.PTN_ADVJ);
}
@@ -340,7 +315,7 @@ public class MorphAnalyzer {
* @param end end
* @param candidates candidates
*/
- public void analysisWithEomi(String stem, String end, List<AnalysisOutput> candidates) {
+ void analysisWithEomi(String stem, String end, List<AnalysisOutput> candidates) {
String[] morphs = EomiUtil.splitEomi(stem, end);
if(morphs[0]==null) return; // ì´ë¯¸ê° ì¬ì ì ë±ë¡ëì´ ìì§ ìë¤ë©´....
@@ -396,7 +371,7 @@ public class MorphAnalyzer {
* ë³µí©ëª
ì¬ì¸ì§ ì¬ë¶ë ë¨ìëª
ì¬ê° 모ë ì¬ì ì ìëì§ ì¬ë¶ë¡ íë¨íë¤.
* ë¨ìëª
ì¬ë 2ê¸ì ì´ì ë¨ì´ììë§ ì°¾ëë¤.
*/
- public boolean confirmCNoun(AnalysisOutput o) {
+ boolean confirmCNoun(AnalysisOutput o) {
if(o.getStem().length()<3) return false;
@@ -455,7 +430,7 @@ public class MorphAnalyzer {
}else if(o.getPatn()==PatternConstants.PTN_NSM) {
if("ë´".equals(o.getVsfx())&&cnouns.get(cnouns.size()-1).getWord().length()!=1) {
WordEntry entry = DictionaryUtil.getWord(cnouns.get(cnouns.size()-1).getWord());
- if(entry!=null&&entry.getFeature(WordEntry.IDX_NE)=='0') return false;
+ if(entry!=null && !entry.hasNE()) return false;
// }else if("í".equals(o.getVsfx())&&cnouns.get(cnouns.size()-1).getWord().length()==1) {
// // ì§ì¬ëíë¤ ì ê°ì ê²½ì°ì ë·ê¸ìê° 1ê¸ìì´ë©´ ì ì¸
// return false;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphUtil.java Mon Oct 21 05:51:59 2013
@@ -48,7 +48,7 @@ class MorphUtil {
* íê¸ íê¸ì를 ì´ì±/ì¤ì±/ì¢
ì±ì ë°°ì´ë¡ ë§ë¤ì´ ë°ííë¤.
* @param c the character to be decomposed
*/
- public static char[] decompose(char c) {
+ static char[] decompose(char c) {
char[] result = null;
if(c>0xD7A3||c<0xAC00) return new char[]{c};
@@ -70,18 +70,17 @@ class MorphUtil {
return result;
}
- public static char compound(int first, int middle, int last) {
+ static char compound(int first, int middle, int last) {
return (char)(0xAC00 + first* JUNG_JONG + middle * JONGSEONG.length + last);
}
-
- public static char makeChar(char ch, int mdl, int last) {
+ static char makeChar(char ch, int mdl, int last) {
ch -= 0xAC00;
int first = ch/JUNG_JONG;
return compound(first,mdl,last);
}
- public static char makeChar(char ch, int last) {
+ static char makeChar(char ch, int last) {
ch -= 0xAC00;
int first = ch/JUNG_JONG;
ch = (char)(ch % JUNG_JONG);
@@ -90,30 +89,14 @@ class MorphUtil {
return compound(first,middle,last);
}
- public static char replaceJongsung(char dest, char source) {
+ static char replaceJongsung(char dest, char source) {
source -= 0xAC00;
int last = source % JONGSEONG.length;
return makeChar(dest,last);
}
-
- /**
- * ííì ì í ì¶ë ¥ì ìí 문ìì´ì ìì±íë¤.
- * @param word word to be printed
- * @param type the type of the input word
- */
- public static String buildTypeString(String word, char type) {
- StringBuffer sb = new StringBuffer();
- sb.append(word);
- sb.append("(");
- sb.append(type);
- sb.append(")");
-
- return sb.toString();
- }
-
- public static void buildPtnVM(AnalysisOutput output, List<AnalysisOutput> candidates) {
+ static void buildPtnVM(AnalysisOutput output, List<AnalysisOutput> candidates) {
String end = output.getEomi();
if(output.getPomi()!=null) end = output.getPomi();
@@ -134,7 +117,7 @@ class MorphUtil {
}
- public static boolean hasVerbOnly(String input) {
+ static boolean hasVerbOnly(String input) {
for (int i = input.length()-1; i >=0; i--) {
if (SyllableFeatures.hasFeature(input.charAt(i), SyllableFeatures.WDSURF)) {
assert input.length() > i;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java Mon Oct 21 05:51:59 2013
@@ -46,7 +46,7 @@ class NounUtil {
* @param o the analyzed output
* @param candidates candidates
*/
- public static boolean analysisMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ static boolean analysisMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
int strlen = o.getStem().length();
@@ -94,7 +94,7 @@ class NounUtil {
* @param o the analyzed output
* @param candidates candidates
*/
- public static boolean analysisVMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ private static boolean analysisVMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
String[] irrs = IrregularUtil.restoreIrregularVerb(o.getStem(), o.getElist().get(0));
if(irrs!=null) {
@@ -118,7 +118,7 @@ class NounUtil {
* @param o the analyzed output
* @param candidates candidates
*/
- public static boolean analysisVMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ private static boolean analysisVMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
@@ -162,7 +162,7 @@ class NounUtil {
* @param o the analyzed output
* @param candidates candidates
*/
- public static boolean analysisNSMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ private static boolean analysisNSMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
int idxVbSfix = VerbUtil.endsWithVerbSuffix(o.getStem());
if(idxVbSfix==-1) return false;
@@ -175,10 +175,10 @@ class NounUtil {
WordEntry entry = DictionaryUtil.getWordExceptVerb(o.getStem());
if(entry!=null) {
- if(entry.getFeature(WordEntry.IDX_NOUN)=='0') return false;
- else if(o.getVsfx().equals("í")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
- else if(o.getVsfx().equals("ë")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
- else if(o.getVsfx().equals("ë´")&&entry.getFeature(WordEntry.IDX_NE)!='1') return false;
+ if(!entry.isNoun()) return false;
+ else if(o.getVsfx().equals("í") && !entry.hasDOV()) return false;
+ else if(o.getVsfx().equals("ë") && !entry.hasBEV()) return false;
+ else if(o.getVsfx().equals("ë´") && !entry.hasNE()) return false;
o.setScore(AnalysisOutput.SCORE_CORRECT); // 'ì
ëë¤'ì¸ ê²½ì° ì¸ëª
ë± ë¯¸ë±ë¡ì´ê° ë§ì´ ë°ìëë¯ë¡ ë¶ìì±ê³µì¼ë¡ ê°ì íë¤.
}else {
o.setScore(AnalysisOutput.SCORE_ANALYSIS); // 'ì
ëë¤'ì¸ ê²½ì° ì¸ëª
ë± ë¯¸ë±ë¡ì´ê° ë§ì´ ë°ìëë¯ë¡ ë¶ìì±ê³µì¼ë¡ ê°ì íë¤.
@@ -189,7 +189,7 @@ class NounUtil {
return true;
}
- public static boolean analysisNSMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ private static boolean analysisNSMXMJ(AnalysisOutput o, List<AnalysisOutput> candidates) {
int idxVbSfix = VerbUtil.endsWithVerbSuffix(o.getStem());
if(idxVbSfix==-1) return false;
@@ -202,10 +202,10 @@ class NounUtil {
WordEntry entry = DictionaryUtil.getWordExceptVerb(o.getStem());
if(entry!=null) {
- if(entry.getFeature(WordEntry.IDX_NOUN)=='0') return false;
- else if(o.getVsfx().equals("í")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
- else if(o.getVsfx().equals("ë")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
- else if(o.getVsfx().equals("ë´")&&entry.getFeature(WordEntry.IDX_NE)!='1') return false;
+ if(!entry.isNoun()) return false;
+ else if(o.getVsfx().equals("í") && !entry.hasDOV()) return false;
+ else if(o.getVsfx().equals("ë") && !entry.hasBEV()) return false;
+ else if(o.getVsfx().equals("ë´") && !entry.hasNE()) return false;
o.setScore(AnalysisOutput.SCORE_CORRECT); // 'ì
ëë¤'ì¸ ê²½ì° ì¸ëª
ë± ë¯¸ë±ë¡ì´ê° ë§ì´ ë°ìëë¯ë¡ ë¶ìì±ê³µì¼ë¡ ê°ì íë¤.
}else {
o.setScore(AnalysisOutput.SCORE_ANALYSIS); // 'ì
ëë¤'ì¸ ê²½ì° ì¸ëª
ë± ë¯¸ë±ë¡ì´ê° ë§ì´ ë°ìëë¯ë¡ ë¶ìì±ê³µì¼ë¡ ê°ì íë¤.
@@ -219,7 +219,7 @@ class NounUtil {
/*
* ë§ì§ë§ ìì ì´ ëª
ì¬í ì 미ì¬(ë±,ì..)ì¸ì§ ì¡°ì¬íë¤.
*/
- public static boolean confirmDNoun(AnalysisOutput output) {
+ static boolean confirmDNoun(AnalysisOutput output) {
int strlen = output.getStem().length();
String d = output.getStem().substring(strlen-1);
@@ -241,7 +241,7 @@ class NounUtil {
return true;
}
- public static boolean endsWith2Josa(String input) {
+ static boolean endsWith2Josa(String input) {
for (int i = input.length()-2; i > 0; i--) {
String josa = input.substring(i);
@@ -254,10 +254,12 @@ class NounUtil {
return false;
}
- public static double countFoundNouns(AnalysisOutput o) {
+ static double countFoundNouns(AnalysisOutput o) {
int count = 0;
- for(int i=0;i<o.getCNounList().size();i++) {
- if(o.getCNounList().get(i).isExist()) count++;
+ for (CompoundEntry entry : o.getCNounList()) {
+ if (entry.isExist()) {
+ count++;
+ }
}
return (count*100)/o.getCNounList().size();
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/VerbUtil.java Mon Oct 21 05:51:59 2013
@@ -26,11 +26,11 @@ import org.apache.lucene.analysis.ko.dic
class VerbUtil {
private VerbUtil() {}
- public static final Map<String, String> verbSuffix = new HashMap<String, String>();
+ private static final Map<String, String> verbSuffix = new HashMap<String, String>();
- public static final Map<String, String> XVerb = new HashMap<String, String>();
+ private static final Map<String, String> XVerb = new HashMap<String, String>();
- public static final Map<String, String> wiAbbrevs = new HashMap<String, String>();
+ private static final Map<String, String> wiAbbrevs = new HashMap<String, String>();
static {
String[] suffixs = {
@@ -47,7 +47,7 @@ class VerbUtil {
/**
* ì´ê°ì´ ì©ì¸íì 미ì¬ë¡ ëëë©´ index 를 ë°ííë¤. ìëë©´ -1ì ë°ííë¤.
*/
- public static int endsWithVerbSuffix(String stem) {
+ static int endsWithVerbSuffix(String stem) {
int len = stem.length();
if(len<2) return -1;
int start = 2;
@@ -61,7 +61,7 @@ class VerbUtil {
/**
* ì´ê°ë¶ì ë³´ì¡°ì©ì¸ [í,ë,ì¤,ë´,주,ì§]ê° ìëì§ ì¡°ì¬íë¤.
*/
- public static int endsWithXVerb(String stem) {
+ static int endsWithXVerb(String stem) {
int len = stem.length();
if(len<2) return -1;
int start = 2;
@@ -72,16 +72,14 @@ class VerbUtil {
return -1;
}
- public static boolean verbSuffix(String stem) {
-
+ static boolean verbSuffix(String stem) {
return verbSuffix.get(stem)!=null;
-
}
/**
* 3. íêµììì´ë¤ : ì²´ì¸ + 'ìì/ë¶í°/ììë¶í°' + 'ì´' + ì´ë¯¸ (PTN_NJCM) <br>
*/
- public static boolean ananlysisNJCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ static boolean ananlysisNJCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
int strlen = o.getStem().length();
boolean success = false;
@@ -119,7 +117,7 @@ class VerbUtil {
* @param o ì´ë¯¸ë¶ì ì´ê°ë¶ê° ë¶ë¦¬ë ê²°ê³¼
* @param candidates candidates
*/
- public static boolean ananlysisNSM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ static boolean ananlysisNSM(AnalysisOutput o, List<AnalysisOutput> candidates) {
if(o.getStem().endsWith("ì¤ë¬ì°")) o.setStem(o.getStem().substring(0,o.getStem().length()-3)+"ì¤ë½");
@@ -132,7 +130,6 @@ class VerbUtil {
o.setStem(o.getStem().substring(0,idxVbSfix));
entry = DictionaryUtil.getAllNoun(o.getStem());
} else { // ì´ ì¶ì½ì¸ ê²½ì°
- if(entry==null) return false;
o.setVsfx("ì´");
o.setStem(o.getStem());
}
@@ -141,10 +138,10 @@ class VerbUtil {
o.setPos(PatternConstants.POS_NOUN);
if(entry!=null) {
- if(entry.getFeature(WordEntry.IDX_NOUN)=='0') return false;
- else if(o.getVsfx().equals("í")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
- else if(o.getVsfx().equals("ë")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
- else if(o.getVsfx().equals("ë´")&&entry.getFeature(WordEntry.IDX_NE)!='1') return false;
+ if(!entry.isNoun()) return false;
+ else if(o.getVsfx().equals("í") && !entry.hasDOV()) return false;
+ else if(o.getVsfx().equals("ë") && !entry.hasBEV()) return false;
+ else if(o.getVsfx().equals("ë´") && !entry.hasNE()) return false;
else if(o.getVsfx().equals("ì´")&&o.getEomi().equals("ì´")) return false;
o.setScore(AnalysisOutput.SCORE_CORRECT); // 'ì
ëë¤'ì¸ ê²½ì° ì¸ëª
ë± ë¯¸ë±ë¡ì´ê° ë§ì´ ë°ìëë¯ë¡ ë¶ìì±ê³µì¼ë¡ ê°ì íë¤.
}else {
@@ -157,7 +154,7 @@ class VerbUtil {
}
- public static boolean ananlysisNSMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ static boolean ananlysisNSMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
if(idxXVerb==-1) return false;
@@ -187,8 +184,8 @@ class VerbUtil {
// }
if(entry==null) return false;
- if(o.getVsfx().equals("í")&&entry.getFeature(WordEntry.IDX_DOV)!='1') return false;
- if(o.getVsfx().equals("ë")&&entry.getFeature(WordEntry.IDX_BEV)!='1') return false;
+ if(o.getVsfx().equals("í") && !entry.hasDOV()) return false;
+ if(o.getVsfx().equals("ë") && !entry.hasBEV()) return false;
o.setScore(AnalysisOutput.SCORE_CORRECT);
if(entry.isCompoundNoun()) {
@@ -200,7 +197,7 @@ class VerbUtil {
return (o.getScore()==AnalysisOutput.SCORE_CORRECT);
}
- public static boolean analysisVMCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ static boolean analysisVMCM(AnalysisOutput o, List<AnalysisOutput> candidates) {
int strlen = o.getStem().length();
@@ -248,7 +245,7 @@ class VerbUtil {
* 6. ëìì£¼ë¤ : ì©ì¸ + 'ì/ì´' + ë³´ì¡°ì©ì¸ + ì´ë¯¸ (PTN_VMXM)
*
*/
- public static boolean analysisVMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
+ static boolean analysisVMXM(AnalysisOutput o, List<AnalysisOutput> candidates) {
int idxXVerb = VerbUtil.endsWithXVerb(o.getStem());
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java Mon Oct 21 05:51:59 2013
@@ -28,36 +28,28 @@ class WSOutput implements Cloneable {
private List<AnalysisOutput> phrases = new ArrayList<AnalysisOutput>();
- public WSOutput() {
-
- }
-
- public WSOutput(AnalysisOutput o) {
- addPhrase(o);
- }
-
- public int getLastStart() {
+ int getLastStart() {
return lastStart;
}
- public void setLastStart(int start) {
+ void setLastStart(int start) {
this.lastStart = start;
}
- public int getLastEnd() {
+ int getLastEnd() {
return lastEnd;
}
- public void setLastEnd(int end) {
+ void setLastEnd(int end) {
this.lastStart = end;
}
- public List<AnalysisOutput> getPhrases() {
+ List<AnalysisOutput> getPhrases() {
return phrases;
}
- public void removeLast() {
+ void removeLast() {
if(this.phrases.size()==0) return;
@@ -81,7 +73,7 @@ class WSOutput implements Cloneable {
}
}
- public void addPhrase(AnalysisOutput o) {
+ void addPhrase(AnalysisOutput o) {
this.lastStart = this.lastEnd;
this.lastEnd += o.getSource().length();
@@ -92,7 +84,7 @@ class WSOutput implements Cloneable {
addCompounds(o);
}
- private void addCompounds(AnalysisOutput o) {
+ void addCompounds(AnalysisOutput o) {
List<CompoundEntry> cnouns = o.getCNounList();
@@ -151,7 +143,7 @@ class WSOutput implements Cloneable {
}
- public void setPhrases(List<AnalysisOutput> phrases) {
+ void setPhrases(List<AnalysisOutput> phrases) {
this.phrases = phrases;
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java Mon Oct 21 05:51:59 2013
@@ -22,13 +22,13 @@ import java.util.List;
public class WordEntry {
- static final int IDX_NOUN = 0;
- static final int IDX_VERB = 1;
- static final int IDX_BUSA = 2;
- public static final int IDX_DOV = 3;
- public static final int IDX_BEV = 4;
- public static final int IDX_NE = 5;
- static final int IDX_REGURA = 9;
+ private static final int IDX_NOUN = 0;
+ private static final int IDX_VERB = 1;
+ private static final int IDX_BUSA = 2;
+ private static final int IDX_DOV = 3;
+ private static final int IDX_BEV = 4;
+ private static final int IDX_NE = 5;
+ private static final int IDX_REGURA = 9;
/** Irregular verb type (ã
-final) */
public static final int VERB_TYPE_BIUP = 'B';
@@ -80,11 +80,7 @@ public class WordEntry {
}
public String getWord() {
- return this.word;
- }
-
- public char getFeature(int index) {
- return features[index];
+ return word;
}
/** Returns true if the entry is a noun (or compound noun) */
@@ -117,4 +113,19 @@ public class WordEntry {
public boolean isAdverb() {
return features[IDX_BUSA] == '1';
}
+
+ /** allows noun analysis with -í verb suffix */
+ public boolean hasDOV() {
+ return features[IDX_DOV] == '1';
+ }
+
+ /** allows noun analysis with -ë verb suffix */
+ public boolean hasBEV() {
+ return features[IDX_BEV] == '1';
+ }
+
+ /** allows noun analysis with -ë´ verb suffix */
+ public boolean hasNE() {
+ return features[IDX_NE] == '1';
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java?rev=1534029&r1=1534028&r2=1534029&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java Mon Oct 21 05:51:59 2013
@@ -132,12 +132,10 @@ public class WordSpaceAnalyzer {
* @return calculated score
*/
public int getOutputScore(List<AnalysisOutput> list) {
-
int score = 100;
- for(AnalysisOutput o : list) {
- if(o.getScore()<score) score = o.getScore();
+ for (AnalysisOutput o : list) {
+ score = Math.min(score, o.getScore());
}
-
return score;
}
@@ -499,7 +497,7 @@ public class WordSpaceAnalyzer {
int ptn = PatternConstants.PTN_N;
- if(entry.getFeature(WordEntry.IDX_NOUN)=='0') {
+ if(!entry.isNoun()) {
pos = PatternConstants.POS_AID;
ptn = PatternConstants.PTN_AID;
}