You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/19 23:16:30 UTC
svn commit: r1533835 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko:
dic/ morph/ utils/
Author: rmuir
Date: Sat Oct 19 21:16:29 2013
New Revision: 1533835
URL: http://svn.apache.org/r1533835
Log:
LUCENE-4956: more cleanups and visibility fixes
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sat Oct 19 21:16:29 2013
@@ -167,14 +167,6 @@ public class DictionaryUtil {
return null;
}
- public static WordEntry getAdverb(String key) {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_BUSA)=='1') return entry;
- return null;
- }
-
public static WordEntry getBusa(String key) {
WordEntry entry = getWord(key);
if(entry==null) return null;
@@ -183,31 +175,6 @@ public class DictionaryUtil {
return null;
}
- public static WordEntry getIrrVerb(String key, char irrType) {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_VERB)=='1'&&
- entry.getFeature(WordEntry.IDX_REGURA)==irrType) return entry;
- return null;
- }
-
- public static WordEntry getBeVerb(String key) {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_BEV)=='1') return entry;
- return null;
- }
-
- public static WordEntry getDoVerb(String key) {
- WordEntry entry = getWord(key);
- if(entry==null) return null;
-
- if(entry.getFeature(WordEntry.IDX_DOV)=='1') return entry;
- return null;
- }
-
public static WordEntry getUncompound(String key) {
return uncompounds.get(key);
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java Sat Oct 19 21:16:29 2013
@@ -20,8 +20,6 @@ package org.apache.lucene.analysis.ko.mo
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.analysis.ko.utils.MorphUtil;
-
public class AnalysisOutput implements Cloneable {
public static final int SCORE_CORRECT = 100;
@@ -33,29 +31,20 @@ public class AnalysisOutput implements C
private String source; //ë¶ìí기 ì 문ìì´(ëìì°ê¸° 모ëìì ì¬ì©ëë¤.)
private int score; // score of this result
private int patn; // word pattern
- private char type; // type of input word
private List<CompoundEntry> compound = new ArrayList<CompoundEntry>(); // compound noun of input word
private String stem;
private char pos; // 3 simplified stem type
- private char pos2; // pos attr. for 'pos'
- private char dinf; // pos inf in Han-dic
private String nsfx; // index of noun suffix
private String josa; // josa string
- private List<String> jlist = new ArrayList<String>(); // unit-josa sequence
private String eomi; // Eomi string
private List<String> elist = new ArrayList<String>(); // unit-Eomi sequence
private String pomi; // prefinal Eomi
private String xverb; // Xverb string
private String vsfx; // verb suffix
- private char vtype; // irregular type
private int maxWordLen = 0; // the max length of words within compound nouns
private int dicWordLen = 0; // the sum of the length of words within compound nouns
- public AnalysisOutput() {
- this.score = SCORE_FAIL;
- }
-
public AnalysisOutput(String stem, String josa, String eomi, int patn) {
this.score = SCORE_ANALYSIS;
this.stem=stem;
@@ -80,9 +69,6 @@ public class AnalysisOutput implements C
public void setPatn(int i) {
this.patn = i;
}
- public void setType(char c) {
- this.type = c;
- }
public void setStem(String s) {
this.stem = s;
@@ -93,14 +79,6 @@ public class AnalysisOutput implements C
this.pos = c;
}
- public void setPos2(char c){
- this.pos2 = c;
- }
-
- public void setDinf(char c){
- this.dinf = c;
- }
-
public void setNsfx(String s) {
this.nsfx = s;
}
@@ -109,10 +87,6 @@ public class AnalysisOutput implements C
this.josa = s;
}
- public void addJlist(String l) {
- this.jlist.add(l);
- }
-
public void setEomi(String s){
this.eomi = s;
}
@@ -134,9 +108,6 @@ public class AnalysisOutput implements C
public void setVsfx(String s) {
this.vsfx = s;
}
- public void setVtype(char c) {
- this.vtype = c;
- }
public int getScore() {
return this.score;
@@ -144,31 +115,19 @@ public class AnalysisOutput implements C
public int getPatn() {
return this.patn;
}
-
- public char getType() {
- return this.type;
- }
+
public String getStem() {
return stem;
}
public char getPos() {
return this.pos;
}
- public char getPos2() {
- return this.pos2;
- }
- public char getDinf() {
- return this.dinf;
- }
public String getNsfx() {
return this.nsfx;
}
public String getJosa() {
return this.josa;
}
- public List<String> getJlist() {
- return this.jlist;
- }
public String getEomi() {
return this.eomi;
}
@@ -184,9 +143,6 @@ public class AnalysisOutput implements C
public String getVsfx() {
return this.vsfx;
}
- public char getVtype() {
- return this.vtype;
- }
public int getMaxWordLen() {
return maxWordLen;
@@ -235,98 +191,10 @@ public class AnalysisOutput implements C
}
public AnalysisOutput clone() {
- final AnalysisOutput output;
try {
- output = (AnalysisOutput)super.clone();
+ return (AnalysisOutput)super.clone();
} catch (CloneNotSupportedException cnse) {
throw new AssertionError();
}
- output.setDinf(this.dinf);
- output.setEomi(this.eomi);
- output.setJosa(this.josa);
- output.setNsfx(this.nsfx);
- output.setPatn(this.patn);
- output.setPomi(this.pomi);
- output.setPos(this.pos);
- output.setPos2(this.pos2);
- output.setScore(this.score);
- output.setStem(this.stem);
- output.setType(this.type);
- output.setVsfx(this.vsfx);
- output.setVtype(this.vtype);
- output.setXverb(this.xverb);
-
- return output;
- }
-
- public String toString() {
- StringBuffer buff = new StringBuffer();
-
- buff.append(MorphUtil.buildTypeString(getStem(),getPos()));
- if(getNsfx()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getNsfx(),PatternConstants.POS_SFX_N));
-
- if(getPatn()==PatternConstants.PTN_NJ || getPatn()==PatternConstants.PTN_ADVJ) {
- buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
- }else if(getPatn()==PatternConstants.PTN_NSM) {
- buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
- }else if(getPatn()==PatternConstants.PTN_NSMJ) {
- buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
- }else if(getPatn()==PatternConstants.PTN_NSMXM) {
- buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_COPULA));
- buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
- }else if(getPatn()==PatternConstants.PTN_NJCM) {
- buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_SFX_V));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
- }else if(getPatn()==PatternConstants.PTN_NSMXMJ) {
- buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_COPULA));
- buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
- }else if(getPatn()==PatternConstants.PTN_VM) {
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
- }else if(getPatn()==PatternConstants.PTN_VMJ) {
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
- }else if(getPatn()==PatternConstants.PTN_VMCM) {
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_SFX_N));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
- }else if(getPatn()==PatternConstants.PTN_VMXM) {
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_COPULA));
- buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
- }else if(getPatn()==PatternConstants.PTN_VMXMJ) {
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_COPULA));
- buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
- if(getPomi()!=null)
- buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
- buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
- }
- return buff.toString();
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java Sat Oct 19 21:16:29 2013
@@ -20,7 +20,6 @@ package org.apache.lucene.analysis.ko.mo
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import java.util.regex.Pattern;
import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
@@ -29,13 +28,7 @@ import org.apache.lucene.analysis.ko.dic
*/
public class CompoundNounAnalyzer {
- private static int score = 1;
-
private boolean exactMach = true;
-
- private static Pattern NUM_PATTERN = Pattern.compile("^[0-9\\.,]+$");
-
- private static Pattern ALPHANUM_PATTERN = Pattern.compile("^[0-9A-Za-z\\.,]+$");
public boolean isExactMach() {
return exactMach;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/MorphAnalyzer.java Sat Oct 19 21:16:29 2013
@@ -305,13 +305,12 @@ public class MorphAnalyzer {
* @param candidates candidates
*/
public void analysisWithJosa(String stem, String end, List<AnalysisOutput> candidates) {
-
if(stem==null||stem.length()==0) return;
char[] chrs = MorphUtil.decompose(stem.charAt(stem.length()-1));
if(!DictionaryUtil.existJosa(end)||
- (chrs.length==3&&ConstraintUtil.isTwoJosa(end))||
- (chrs.length==2&&(ConstraintUtil.isThreeJosa(end))||"".equals(end))) return; // ì°ê²°ì´ ê°ë¥í ì¡°ì¬ê° ìëë©´...
+ (chrs.length==3 && end.length() == 1 && ConstraintUtil.isTwoJosa(end.charAt(0))) ||
+ (chrs.length==2 && (end.length() == 1 && ConstraintUtil.isThreeJosa(end.charAt(0)))||"".equals(end))) return; // ì°ê²°ì´ ê°ë¥í ì¡°ì¬ê° ìëë©´...
AnalysisOutput output = new AnalysisOutput(stem, end, null, PatternConstants.PTN_NJ);
output.setPos(PatternConstants.POS_NOUN);
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/PatternConstants.java Sat Oct 19 21:16:29 2013
@@ -42,17 +42,6 @@ public interface PatternConstants {
public static int PTN_ZZZ = 35; //* 문ì¥ë¶í¸, KS ìì±í 기í¸ì´, ë¨ë
ì¡°ì¬/ì´ë¯¸ */
-
- /**
- * Definition of sentence types and parts of speech
- */
-
- //* CLASSIFICATION OF SENTENCE PATTERNS */
- public static char SPTN_DECL = 'D'; //* declarative sentence */
- public static char SPTN_QUES = 'Q'; //* question sentence */
- public static char SPTN_IMPR = 'I'; //* imperative sentence */
- public static char SPTN_TITL = 'T'; //* title of a paragraph */
-
//* CLASSIFICATION OF PARTS OF SPEECH */
// 3(basic) + 2(special) types of stem for 'pos'
public static char POS_NPXM = 'N'; //* noun, pnoun, xn, nume */
@@ -103,191 +92,4 @@ public interface PatternConstants {
public static char POS_ETC = 'Z'; //* not decided yet */
- /* ASCII stem may be classified as follows: NOT USED YET */
- public static char POS_ALPHA = 'A'; //* English alphabet */
- public static char POS_NUMBER = '#'; //* Arabic numbers */
- public static char POS_SMARK = 'R'; //* sentence markers */
-
- public static char POS_NVERBK = 'Y'; //* guessed as noun+verb */
-
- public static char POS_SQUOTE = 's'; //* single quotation */
- public static char POS_DQUOTE = 'd'; //* double quotation */
- public static char POS_LPAREN = 'l'; //* left parenthesis */
- public static char POS_RPAREN = 'r'; //* right parenthesis */
-
-
- /**---------------------- ë¶ê·ì¹ ë³í ì í ------------------------ */
- public static char IRR_TYPE_DI = 'd'; //* ã· ë¶ê·ì¹
- public static char IRR_TYPE_BI = 'b'; //* ã
ë¶ê·ì¹
- public static char IRR_TYPE_SI = 's'; //* ã
ë¶ê·ì¹
- public static char IRR_TYPE_HI = 'h'; //* ã
ë¶ê·ì¹
- public static char IRR_TYPE_RO = 'r'; //* ë¬ ë¶ê·ì¹
- public static char IRR_TYPE_LO = 'l'; //* 르 ë¶ê·ì¹
- public static char IRR_TYPE_OU = 'o'; // * ì° ë¶ê·ì¹
- public static char IRR_TYPE_GU = 'g'; // *ê±°ë¼ ë¶ê·ì¹
- public static char IRR_TYPE_NU = 'n'; // * ëë¼ ë¶ê·ì¹
- public static char IRR_TYPE_YO = 'y'; // * ì¬ ë¶ê·ì¹
- public static char IRR_TYPE_LI = 'L'; // * ã¹ íë½
- public static char IRR_TYPE_UO = 'u'; //ì¼ íë½
- public static char IRR_TYPE_AH = 'a'; // ì íë½
- public static char IRR_TYPE_AE = 'e'; // ì ì¶ì½
- public static char IRR_TYPE_WA = 'w'; // ì ì¶ì½
- public static char IRR_TYPE_EI = 'e'; // ì´ ì¶ì½
- public static char IRR_TYPE_OE = 'O'; // ì¸ ì¶ì½
-
-
- /**----------------------- ì¡°ì¬ì ë³ì´ì²´ ì í ------------------------
- *
- *JOSA_VAR_WiAb -- 'ì/ë', 'ì´/ê°', 'ì/를', 'ì/ê³¼', 'ì/ì¼' êµ¬ë¶ ì ë³´
- * 'ë/ê°/를/ì/ì¼'ì¸ ê²½ì°ì ì´ ê°ì´ set.
- *JOSA_VAR_Wz_tal -- 'ë¡/ì¼ë¡' êµ¬ë¶ ì ë³´ (ì) 'íêµë¡' --> 'íêµ'+'ì¼ë¡'
- * 'ì¼'ê° íë½ëì´ ë³µìë ê²½ì°ì ì´ ê°ì´ set.
- *JOSA_VAR_Wi_tal -- ì¡°ì¬ 'ê³ 'ì 'ì´ê³ ', 'ë¼ê³ 'ì 'ì´ë¼ê³ ' êµ¬ë¶ ì ë³´
- * 'ì´'ê° ìëµëì´ ë³µìí ê²½ì°ì ì´ ê°ì´ set.
- *JOSA_VAR_Wg_tal -- ì¡°ì¬ 'ìì'ì 'ì' ìëµ ì ë³´
- * 'ì'ê° ìëµëì´ ë³µìí ê²½ì°ì ì´ ê°ì´ set. 'íêµì' --> 'íêµ'+'ìì'
- *
- *JOSA_VAR_nameWi -- ì¸ëª
ë¤ì ì¡°ìì 'ì´' ì¶ê°ëë ê²½ì°
- * 'ì´'를 ë¶ìê²°ê³¼ìì ìì í ê²½ì°. 'ì¹ì'+'(ì´)ê°'
- * <ì°¸ê³ > ì´ ê²½ì°ë íì JOSA_VAR_WiAb íìì ëë°íë¤.
- *JOSA_VAR_preWi -- ìì 격 ì¡°ì¬ 'ì´' ìì ì¡°ì¬ê° ì¤ë ê²½ì°
- * (ì) 'ìì/ë¶í°/ììë¶í°/ëë¡' + 'ì´' + 'ë¤'
- *JOSA_VAR_preWi2 -- ìì 격 ì¡°ì¬ 'ì´' ìì ì¡°ì¬ & 'ì' íë½ë ê²½ì°
- * (ì) 'íêµìë¤' --> 'íêµ'+'(ì)ì'+'(ì´)'+'ë¤'
- *
- *JOSA_VAR_Ag -- 'ìê²'ì ë³ì´ì²´ 'ê²'
- *JOSA_VAR_Bg -- 'ìê²'ì ë³ì´ì²´ 'ê»'
- *JOSA_VAR_hbDtg -- 'ìê²'ì ë³ì´ì²´ 'íí
'
- *
- *$$$ íì¬, 'ì¹ìì´íí
'ì ê²½ì°ì ì ë³´ê° ë¶ì¶©ë¶í ì ì´ ìì.
- *------------------------- ì¡°ì¬ì ë³ì´ì²´ ì í ------------------------*/
-
- // Values for 'jomi.josa'.
- public static int JOSA_VAR_WiAb =1;
- public static int JOSA_VAR_Wz_tal =2;
- public static int JOSA_VAR_Wi_tal =3;
- public static int JOSA_VAR_Wg_tal =4;
-
- public static int JOSA_VAR_nameWi =5;
- public static int JOSA_VAR_preWi =6;
- public static int JOSA_VAR_preWi2 =7;
-
- //Values for 'jomi.josaAgBg'.
- public static int JOSA_VAR_Ag =1;
- public static int JOSA_VAR_Bg =2;
- public static int JOSA_VAR_hbDtg =3;
-
- /**--------------------- ì´ë§ì´ë¯¸ì ë³ì´ì²´ ì í ----------------------
- *
- *EOMI_VAR_Wb -- 'ì'
- *EOMI_VAR_Wf -- 'ì´'
- *EOMI_VAR_Wj -- 'ì¬' : 'ì¬/ê±°/ë/ë¬/ë¼'
- *EOMI_VAR_Wb_tal -- 'ì' íë½
- *EOMI_VAR_Wf_tal -- 'ì´' íë½
- *EOMI_VAR_b -- 'ã
'
- *EOMI_VAR_f -- 'ã
'
- *EOMI_VAR_j -- 'ã
'
- *EOMI_VAR_c -- 'ã
' ---> 'í´ì', 'ê¹ë§¤ì/íìì' ë± ã
-ë¶ê·ì¹
- *EOMI_VAR_lc -- 'ã
' ---> 'ëë¤'ìë§ ì ì©
- *EOMI_VAR_If, Ib -- 'ë¬' ë¶ê·ì¹ì¸ ê²½ì°
- *
- *EOMI_VAR_Wz_tal -- ì¢
ì± 'ã´/ã¹/ã
/ã
' ë° ì´ì± 'ã¹/ã
'ì¼ë¡ ììëë ì´ë¯¸ìì 'ì¼' íë½
- * <주ì> 'ë/ë/ë'ë¡ ììëë ì´ë¯¸ë¤ì 'ì¼' íë½ì¼ë¡ ê°ì£¼íì§ ìì
- *EOMI_VAR_Uz_tal -- 'ìµëë¤'ìì 'ì¤' íë½
- *
- *EOMI_VAR_xv_Wf -- ë³´ì¡°ì©ì¸ ìì ì¤ë ì´ë¯¸ê° 'ì/ì´'
- *EOMI_VAR_xv_Al -- ë³´ì¡°ì©ì¸ ìì ì¤ë ì´ë¯¸ê° 'ê³ '
- *EOMI_VAR_xv_Ag -- ë³´ì¡°ì©ì¸ ìì ì¤ë ì´ë¯¸ê° 'ê²'
- *
- *EOMI_VAR_Wi_tal -- 무ì¢
ì± ì©ì¸ ë¤ìì ìì 격 ì¡°ì¬ 'ì´' ìëµ
- *
- *$$$ 'ì/ì´'ì ëí ë³ì´ì²´ë ë³´ì¡°ì©ì¸ ìì ì¤ë 'ì/ì´'ìë ì ì©ë¨.
- *
- *----------------------- ì´ë§ì´ë¯¸ì ë³ì´ì²´ ì í ----------------------*/
-
- //Values for 'jomi.eomi' or 'jomi.xomi'.
- public static int EOMI_VAR_Wb =1;
- public static int EOMI_VAR_Wf =2;
- public static int EOMI_VAR_Wj =3;
- public static int EOMI_VAR_Wb_tal =4;
- public static int EOMI_VAR_Wf_tal =5;
- public static int EOMI_VAR_b =6;
- public static int EOMI_VAR_f =7;
- public static int EOMI_VAR_j =8;
- public static int EOMI_VAR_c =9;
- public static int EOMI_VAR_lc =10;
- public static int EOMI_VAR_If =11;
- public static int EOMI_VAR_Ib =12;
-
- public static int EOMI_VAR_Wz_tal =13;
- public static int EOMI_VAR_Uz_tal =14;
-
- public static int EOMI_VAR_Wi_tal =15;
-
- //Values for 'jomi.xomitype'.
- public static int EOMI_VAR_xv_Wf =0; // ì/ì´
- public static int EOMI_VAR_xv_Al =1; // ê³
- public static int EOMI_VAR_xv_Ag =2; // ê²
- public static int EOMI_VAR_xv_Xi =11; // ì§
-
- /**--------------------- ì ì´ë§ì´ë¯¸ì ë³ì´ì²´ ì í ----------------------
- *
- * ì ì´ë§ì´ë¯¸ì ë³ì´ì²´ ì í --- ë ê°ì§ ì 보를 íí
- *
- * 1. 'ì' ìì ì¡°ìì 'ì¼'ê° ì¤ë ê²½ì°
- * 2. 'ì/ì'ì ë³ì´ì²´ ì ë³´
- *
- * POMI_VAR_WbV -- 'ì'
- * POMI_VAR_WfV -- 'ì'
- * POMI_VAR_WjV -- 'ì'
- * POMI_VAR_V -- 'ã
'
- * POMI_VAR_bV -- 'ã
ã
'
- * POMI_VAR_fV -- 'ã
ã
'
- * POMI_VAR_jV -- 'ã
ã
'
- * POMI_VAR_cV -- 'ã
ã
' ---> 'íë¤'ìë§ ì ì©ë¨
- * POMI_VAR_lcV -- 'ã
ã
' ---> 'ëë¤'ìë§ ì ì©ë¨
- * POMI_VAR_IfV -- 'ë ' ---> 'ë¬' ë¶ê·ì¹ì¸ ê²½ì°
- * POMI_VAR_WzUi -- 'ì¼ì' & 'ì'
- * POMI_VAR_WzUjV -- 'ì¼ì' & 'ã
ã
', ì¦ 'ì¼ì
¨'
- *
- *----------------------- ì ì´ë§ì´ë¯¸ì ë³ì´ì²´ ì í ----------------------*/
-
- // Values for 'jomi.pomi'.
- public static int POMI_VAR_WbV =1;
- public static int POMI_VAR_WfV =2;
- public static int POMI_VAR_WjV =3;
- public static int POMI_VAR_V =4;
- public static int POMI_VAR_bV =5;
- public static int POMI_VAR_fV =6;
- public static int POMI_VAR_jV =7;
- public static int POMI_VAR_cV =8;
- public static int POMI_VAR_lcV =9;
- public static int POMI_VAR_IfV =10;
- public static int POMI_VAR_WzUi =11;
- public static int POMI_VAR_WzUjV =12;
-
- /**--------------------- ì¡°ì¬/ì´ë¯¸ ì¸ ê¸°í ì ë³´ ----------------------
- *
- * RMA_RESULT --- the result is got from 'hangul.rma'
- * GUESS_ABBR --- verb stem is guessed as abbr. 'ki/kg/Zi/...'
- *
- * GUESS_CNOUN -- stem is guessed as noun + noun + ...
- * GUESS_PNOUN -- proper noun with Jongsong: articulative 'Wi' dropped.
- *
- * GUESS_NPREF -- noun stem is guessed as prefix 'Gc/Ul' + noun
- * GUESS_VPREF -- verb stem is guessed as prefix 'WbD/QlU' + verb
- * GUESS_NVERB -- verb stem is guessed as noun + verb + ...
- *
- *----------------------- ì¡°ì¬/ì´ë¯¸ ì¸ ê¸°í ì ë³´ ----------------------*/
-
- // Values for 'jomi.zzz'.
- public static int RMA_RESULT =1;
- public static int GUESS_ABBR =2;
-
- public static int GUESS_CNOUN =3;
- public static int GUESS_PNOUN =4;
-
- public static int GUESS_NPREF =5;
- public static int GUESS_VPREF =6;
- public static int GUESS_NVERB =7;
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOuputComparator.java Sat Oct 19 21:16:29 2013
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ko.mo
import java.util.Comparator;
-public class WSOuputComparator implements Comparator<AnalysisOutput> {
+class WSOuputComparator implements Comparator<AnalysisOutput> {
public int compare(AnalysisOutput o1, AnalysisOutput o2) {
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WSOutput.java Sat Oct 19 21:16:29 2013
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ko.mo
import java.util.ArrayList;
import java.util.List;
-public class WSOutput implements Cloneable {
+class WSOutput implements Cloneable {
private int lastStart = 0;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java Sat Oct 19 21:16:29 2013
@@ -574,14 +574,4 @@ public class WordSpaceAnalyzer {
return false;
}
-
- private void printCandidate(WSOutput output) {
-
- List<AnalysisOutput> os = output.getPhrases();
- for(AnalysisOutput o : os) {
- System.out.print(o.toString()+"("+o.getScore()+")| ");
- }
- System.out.println("<==");
-
- }
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/ConstraintUtil.java Sat Oct 19 21:16:29 2013
@@ -17,98 +17,46 @@ package org.apache.lucene.analysis.ko.ut
* limitations under the License.
*/
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.lucene.analysis.ko.morph.PatternConstants;
-
/**
* ê²°í©ì´ ê°ë¥í ì¡°ê±´ì ì²ë¦¬íë í´ëì¤
*/
public class ConstraintUtil {
private ConstraintUtil() {}
-
- private static Map<String, String> hahes = new HashMap<String, String>(); // "ê¸ë¡ë²íí´ ", "민족íí´" ì²ë¼ íí´ì ê²°í©ì´ ê°ë¥í ëª
ì¬
- static {
- hahes.put("민족", "Y");hahes.put("ëì", "Y");hahes.put("ë¨ë¶", "Y");
- }
- private static Map<String, String> eomiPnouns = new HashMap<String, String>();
- static {
- eomiPnouns.put("ã´", "Y");eomiPnouns.put("ã¹", "Y");eomiPnouns.put("ã
", "Y");
- }
-
- private static Map<Integer, Integer> PTN_MLIST= new HashMap<Integer, Integer>();
- static {
- PTN_MLIST.put(PatternConstants.PTN_NSM, PatternConstants.PTN_NSM);
- PTN_MLIST.put(PatternConstants.PTN_NSMXM, PatternConstants.PTN_NSMXM);
- PTN_MLIST.put(PatternConstants.PTN_NJCM, PatternConstants.PTN_NJCM);
- PTN_MLIST.put(PatternConstants.PTN_VM, PatternConstants.PTN_VM);
- PTN_MLIST.put(PatternConstants.PTN_VMCM, PatternConstants.PTN_VMCM);
- PTN_MLIST.put(PatternConstants.PTN_VMXM, PatternConstants.PTN_VMXM);
- PTN_MLIST.put(PatternConstants.PTN_NVM, PatternConstants.PTN_NVM);
- }
-
- private static Map<Integer, Integer> PTN_JLIST= new HashMap<Integer, Integer>();
- static {
- PTN_JLIST.put(PatternConstants.PTN_NJ, PatternConstants.PTN_NJ);
- PTN_JLIST.put(PatternConstants.PTN_NSMJ, PatternConstants.PTN_NSMJ);
- PTN_JLIST.put(PatternConstants.PTN_VMJ, PatternConstants.PTN_VMJ);
- PTN_JLIST.put(PatternConstants.PTN_VMXMJ, PatternConstants.PTN_VMXMJ);
- }
-
- private static Map<String, String> WORD_GUKS= new HashMap<String, String>();
- static {
- WORD_GUKS.put("ë ê²", "Y");
- WORD_GUKS.put("ë¤ê²", "Y");
- WORD_GUKS.put("ë³ê²", "Y");
- WORD_GUKS.put("ì°°ê²", "Y");
- WORD_GUKS.put("íê²", "Y");
- WORD_GUKS.put("íìê²", "Y");
+ public static boolean canHaheCompound(String key) {
+ return key.length() == 2 && ("민족".equals(key) || "ëì".equals(key) || "ë¨ë¶".equals(key));
}
// ì¢
ì±ì´ ìë ìì ê³¼ ì°ê²°ë ì ìë ì¡°ì¬
- private static Map<String, String> JOSA_TWO = new HashMap<String, String>();
- static {
- JOSA_TWO.put("ê°", "Y");
- JOSA_TWO.put("ë", "Y");
- JOSA_TWO.put("ë¤", "Y");
- JOSA_TWO.put("ë", "Y");
- JOSA_TWO.put("ë", "Y");
- JOSA_TWO.put("ê³ ", "Y");
- JOSA_TWO.put("ë¼", "Y");
- JOSA_TWO.put("ì", "Y");
- JOSA_TWO.put("ë", "Y");
- JOSA_TWO.put("를", "Y");
- JOSA_TWO.put("ë©°", "Y");
- JOSA_TWO.put("ë ", "Y");
- JOSA_TWO.put("ì¼", "Y");
- JOSA_TWO.put("ì¬", "Y");
+ public static boolean isTwoJosa(char josa) {
+ switch (josa) {
+ case 'ê°':
+ case 'ë':
+ case 'ë¤':
+ case 'ë':
+ case 'ë':
+ case 'ê³ ':
+ case 'ë¼':
+ case 'ì':
+ case 'ë':
+ case '를':
+ case 'ë©°':
+ case 'ë ':
+ case 'ì¼':
+ case 'ì¬': return true;
+ default: return false;
+ }
}
// ì¢
ì±ì´ ìë ìì ê³¼ ì°ê²°ë ì ìë ì¡°ì¬
- private static Map<String, String> JOSA_THREE= new HashMap<String, String>();
- static {
- JOSA_THREE.put("ê³¼", "Y");
- JOSA_THREE.put("ì", "Y");
- JOSA_THREE.put("ì", "Y");
- JOSA_THREE.put("ì¼", "Y");
- JOSA_THREE.put("ì", "Y");
- JOSA_THREE.put("ì", "Y");
- }
-
- public static boolean canHaheCompound(String key) {
- if(hahes.get(key)!=null) return true;
- return false;
- }
-
- public static boolean isTwoJosa(String josa) {
-
- return (JOSA_TWO.get(josa)!=null);
-
- }
- public static boolean isThreeJosa(String josa) {
-
- return (JOSA_THREE.get(josa)!=null);
- }
+ public static boolean isThreeJosa(char josa) {
+ switch (josa) {
+ case 'ê³¼':
+ case 'ì':
+ case 'ì':
+ case 'ì¼':
+ case 'ì': return true;
+ default: return false;
+ }
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/EomiUtil.java Sat Oct 19 21:16:29 2013
@@ -22,11 +22,11 @@ import org.apache.lucene.analysis.ko.dic
public class EomiUtil {
private EomiUtil() {}
- public static final String RESULT_FAIL = "0";
+ static final String RESULT_FAIL = "0";
- public static final String RESULT_SUCCESS = "1";
+ static final String RESULT_SUCCESS = "1";
- public static final String[] verbSuffix = {
+ static final String[] verbSuffix = {
"ì´","í","ë","ì¤ë½","ì¤ë¬ì°","ìí¤","ì","ì","ê°","ë¹í","ë§í","ë리","ë°","ë","ë´"
};
@@ -146,7 +146,7 @@ public class EomiUtil {
results[1] = pomi;
}
- public static boolean IsNLMBSyl(char ech, char lch) {
+ static boolean IsNLMBSyl(char ech, char lch) {
switch(lch) {
case 'ã´' :
return SyllableUtil.hasFeature(ech, SyllableUtil.YNPNA) || SyllableUtil.hasFeature(ech, SyllableUtil.YNPLN);
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java?rev=1533835&r1=1533834&r2=1533835&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/NounUtil.java Sat Oct 19 21:16:29 2013
@@ -29,14 +29,18 @@ import org.apache.lucene.analysis.ko.mor
public class NounUtil {
private NounUtil() {}
- private static final List<String> DNouns = new ArrayList<String>();
-
- static {
- String[] strs = new String[]{"ë±", "ë¤","ì","ê°","ë¿","ë³","ì "};
- for(String str:strs) {
- DNouns.add(str);
+ private static boolean isDNoun(char ch) {
+ switch(ch) {
+ case 'ë±':
+ case 'ë¤':
+ case 'ì':
+ case 'ê°':
+ case 'ë¿':
+ case 'ë³':
+ case 'ì ': return true;
+ default: return false;
}
- };
+ }
/**
*
@@ -52,7 +56,6 @@ public class NounUtil {
if(strlen<2) return false;
char[] chrs = MorphUtil.decompose(o.getStem().charAt(strlen-1));
- boolean success = false;
if(o.getStem().charAt(strlen-1)!='기'&&!(chrs.length==3&&chrs[2]=='ã
')) return false;
@@ -223,7 +226,7 @@ public class NounUtil {
int strlen = output.getStem().length();
String d = output.getStem().substring(strlen-1);
- if(!DNouns.contains(d)) return false;
+ if(d.length() != 1 || !isDNoun(d.charAt(0))) return false;
String s = output.getStem().substring(0, strlen-1);
output.setNsfx(d);