You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/10/16 14:02:48 UTC
svn commit: r1532739 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko:
morph/AnalysisOutput.java morph/CompoundNounAnalyzer.java
utils/IrregularUtil.java utils/Utilities.java
Author: uschindler
Date: Wed Oct 16 12:02:48 2013
New Revision: 1532739
URL: http://svn.apache.org/r1532739
Log:
LUCENE-4956: More obsolete stuff (not even used), some moves to classes where code parts are solely used
Removed:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/Utilities.java
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/IrregularUtil.java
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java?rev=1532739&r1=1532738&r2=1532739&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/AnalysisOutput.java Wed Oct 16 12:02:48 2013
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ko.mo
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.analysis.ko.utils.Utilities;
+import org.apache.lucene.analysis.ko.utils.MorphUtil;
public class AnalysisOutput implements Cloneable {
@@ -38,7 +38,7 @@ public class AnalysisOutput implements C
private String stem;
private char pos; // 3 simplified stem type
private char pos2; // pos attr. for 'pos'
- private char dinf; // pos info. in Han-dic
+ private char dinf; // pos inf in Han-dic
private String nsfx; // index of noun suffix
private String josa; // josa string
private List<String> jlist = new ArrayList<String>(); // unit-josa sequence
@@ -256,6 +256,73 @@ public class AnalysisOutput implements C
}
public String toString() {
- return Utilities.buildOutputString(this);
+ StringBuffer buff = new StringBuffer();
+
+ buff.append(MorphUtil.buildTypeString(getStem(),getPos()));
+ if(getNsfx()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getNsfx(),PatternConstants.POS_SFX_N));
+
+ if(getPatn()==PatternConstants.PTN_NJ || getPatn()==PatternConstants.PTN_ADVJ) {
+ buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
+ }else if(getPatn()==PatternConstants.PTN_NSM) {
+ buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
+ }else if(getPatn()==PatternConstants.PTN_NSMJ) {
+ buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
+ }else if(getPatn()==PatternConstants.PTN_NSMXM) {
+ buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_COPULA));
+ buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
+ }else if(getPatn()==PatternConstants.PTN_NJCM) {
+ buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_SFX_V));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
+ }else if(getPatn()==PatternConstants.PTN_NSMXMJ) {
+ buff.append(",").append(MorphUtil.buildTypeString(getVsfx(),PatternConstants.POS_SFX_V));
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_COPULA));
+ buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
+ }else if(getPatn()==PatternConstants.PTN_VM) {
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
+ }else if(getPatn()==PatternConstants.PTN_VMJ) {
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
+ }else if(getPatn()==PatternConstants.PTN_VMCM) {
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_SFX_N));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
+ }else if(getPatn()==PatternConstants.PTN_VMXM) {
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_COPULA));
+ buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getEomi(),PatternConstants.POS_EOMI));
+ }else if(getPatn()==PatternConstants.PTN_VMXMJ) {
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(1),PatternConstants.POS_COPULA));
+ buff.append(",").append(MorphUtil.buildTypeString(getXverb(),PatternConstants.POS_XVERB));
+ if(getPomi()!=null)
+ buff.append(",").append(MorphUtil.buildTypeString(getPomi(),PatternConstants.POS_PEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getElist().get(0),PatternConstants.POS_NEOMI));
+ buff.append(",").append(MorphUtil.buildTypeString(getJosa(),PatternConstants.POS_JOSA));
+ }
+ return buff.toString();
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java?rev=1532739&r1=1532738&r2=1532739&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/CompoundNounAnalyzer.java Wed Oct 16 12:02:48 2013
@@ -23,7 +23,6 @@ import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.ko.utils.Utilities;
import org.apache.lucene.analysis.ko.dic.DictionaryUtil;
import org.apache.lucene.analysis.ko.morph.CompoundEntry;
import org.apache.lucene.analysis.ko.morph.MorphException;
@@ -456,10 +455,23 @@ public class CompoundNounAnalyzer {
return new CompoundEntry(input, 0, score==AnalysisOutput.SCORE_CORRECT,pos);
}
+ private static boolean isAlphaNumeric(String text) {
+
+ for(int i=0;i<text.length();i++)
+ {
+ int c = text.charAt(i);
+ if((c>=48 && c<=57) || (c>=65 && c<=90) || (c>=97 && c<=122)) {
+ continue;
+ }
+ return false;
+ }
+ return true;
+ }
+
private boolean validCompound(String before, String after, boolean isFirst, int pos) throws MorphException {
if(pos==1&&before.length()==1&&
- (!isFirst||!(DictionaryUtil.existPrefix(before)||Utilities.isAlphaNumeric(before)))) return false;
+ (!isFirst||!(DictionaryUtil.existPrefix(before) || isAlphaNumeric(before)))) return false;
if(after.length()==1&&!isFirst&&!DictionaryUtil.existSuffix(after)) return false;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/IrregularUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/IrregularUtil.java?rev=1532739&r1=1532738&r2=1532739&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/IrregularUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/utils/IrregularUtil.java Wed Oct 16 12:02:48 2013
@@ -149,7 +149,7 @@ public class IrregularUtil {
ch = MorphUtil.makeChar(ch, 17);
if(start.length()>2)
- start = Utilities.arrayToString(new String[]{start.substring(0,start.length()-2),Character.toString(ch)});
+ start = arrayToString(new String[]{start.substring(0,start.length()-2),Character.toString(ch)});
else
start = Character.toString(ch);
@@ -161,6 +161,14 @@ public class IrregularUtil {
return null;
}
+ private static String arrayToString(String[] strs) {
+ StringBuffer sb = new StringBuffer();
+ for(String str:strs) {
+ sb.append(str);
+ }
+ return sb.toString();
+ }
+
/**
* ã· ë¶ê·ì¹ ìíì ë³µìíë¤. (깨ë«ë¤, 묻ë¤)
* @param start start text
@@ -176,7 +184,7 @@ public class IrregularUtil {
ch = MorphUtil.makeChar(ch, 7);
if(start.length()>1)
- start = Utilities.arrayToString(new String[]{start.substring(0,start.length()-1),Character.toString(ch)});
+ start = arrayToString(new String[]{start.substring(0,start.length()-1),Character.toString(ch)});
else
start = Character.toString(ch);