You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/27 19:15:44 UTC
svn commit: r1536174 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko:
dic/DictionaryUtil.java morph/EomiUtil.java morph/NounUtil.java
Author: rmuir
Date: Sun Oct 27 18:15:44 2013
New Revision: 1536174
URL: http://svn.apache.org/r1536174
Log:
LUCENE-4956: move this out of dictionaryutil
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1536174&r1=1536173&r2=1536174&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sun Oct 27 18:15:44 2013
@@ -203,25 +203,6 @@ public class DictionaryUtil {
return suffixs.contains(str);
}
- /**
- * ã´,ã¹,ã
,ã
ê³¼ eomi ê° ê²°í©íì¬ ì´ë¯¸ê° ë ì ìëì§ ì ê²íë¤.
- */
- public static String combineAndEomiCheck(char s, String eomi) {
-
- if(eomi==null) eomi="";
-
- if(s=='ã´') eomi = "ì"+eomi;
- else if(s=='ã¹') eomi = "ì"+eomi;
- else if(s=='ã
') eomi = "ì"+eomi;
- else if(s=='ã
') eomi = "ìµ"+eomi;
- else eomi = s+eomi;
-
- if(existEomi(eomi)) return eomi;
-
- return null;
-
- }
-
private static void readFileToSet(final Set<String> set, String dic) throws IOException {
DictionaryResources.readLines(dic, new LineProcessor() {
@Override
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java?rev=1536174&r1=1536173&r2=1536174&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java Sun Oct 27 18:15:44 2013
@@ -184,7 +184,7 @@ class EomiUtil {
if((chrs.length==3)
&&(chrs[2]=='ã´'||chrs[2]=='ã¹'||chrs[2]=='ã
'||chrs[2]=='ã
')
&&EomiUtil.IsNLMBSyl(estem,chrs[2])
- && DictionaryUtil.combineAndEomiCheck(chrs[2], end)!=null)
+ && combineAndEomiCheck(chrs[2], end)!=null)
{
strs[1] = Character.toString(chrs[2]);
if(end.length()>0) strs[1] += end;
@@ -192,7 +192,7 @@ class EomiUtil {
strs[0] = stem.substring(0,strlen-1) + MorphUtil.makeChar(estem, 0);
}
else if(chrs.length==3 && chrs[2]=='ã¹' && DictionaryUtil.getVerb(stem)!=null
- && DictionaryUtil.combineAndEomiCheck(chrs[2], end)!=null)
+ && combineAndEomiCheck(chrs[2], end)!=null)
{
strs[1] = Character.toString(chrs[2]);
if(end.length()>0) strs[1] += end;
@@ -211,27 +211,27 @@ class EomiUtil {
else if(chrs[0]!='ã
'&&
(chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
')&&
(chrs.length==2 || SyllableFeatures.hasFeature(estem, SyllableFeatures.YNPAH)) &&
- (DictionaryUtil.combineAndEomiCheck('ì´', end)!=null))
+ (combineAndEomiCheck('ì´', end)!=null))
{
strs[0] = stem;
if(chrs.length==2) strs[1] = "ì´"+end;
else strs[1] = end;
}
else if(estem=='í'&&end!=null&&end.startsWith("ì¬")&&
- DictionaryUtil.combineAndEomiCheck('ì´', end.substring(1))!=null)
+ combineAndEomiCheck('ì´', end.substring(1))!=null)
{
strs[0] = stem;
strs[1] = "ì´"+end.substring(1);
}
else if(estem=='ë ¤'&&end!=null&& // êº¼ë ¤=>꺼리ì´, êº¼ë ¤ì=>꺼리ì´ì
- DictionaryUtil.combineAndEomiCheck('ì´', end)!=null)
+ combineAndEomiCheck('ì´', end)!=null)
{
strs[0] = stem.substring(0,stem.length()-1)+"리";
strs[1] = "ì´"+end;
}
else if((chrs.length==2)&&
(chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
'||chrs[1]=='ã
')&&
- (DictionaryUtil.combineAndEomiCheck('ì´', end)!=null))
+ (combineAndEomiCheck('ì´', end)!=null))
{
StringBuffer sb = new StringBuffer();
@@ -265,4 +265,32 @@ class EomiUtil {
return strs;
}
+
+ /**
+ * ã´,ã¹,ã
,ã
ê³¼ eomi ê° ê²°í©íì¬ ì´ë¯¸ê° ë ì ìëì§ ì ê²íë¤.
+ */
+ private static String combineAndEomiCheck(char s, String eomi) {
+
+ if (eomi == null) {
+ eomi = "";
+ }
+
+ switch(s) {
+ case 'ã´': eomi = "ì" + eomi;
+ break;
+ case 'ã¹': eomi = "ì" + eomi;
+ break;
+ case 'ã
': eomi = "ì" + eomi;
+ break;
+ case 'ã
': eomi = "ìµ" + eomi;
+ break;
+ default: eomi = s + eomi;
+ }
+
+ if (DictionaryUtil.existEomi(eomi)) {
+ return eomi;
+ } else {
+ return null;
+ }
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java?rev=1536174&r1=1536173&r2=1536174&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/NounUtil.java Sun Oct 27 18:15:44 2013
@@ -27,19 +27,6 @@ import org.apache.lucene.analysis.ko.dic
class NounUtil {
private NounUtil() {}
-
- private static boolean isDNoun(char ch) {
- switch(ch) {
- case 'ë±':
- case 'ë¤':
- case 'ì':
- case 'ê°':
- case 'ë¿':
- case 'ë³':
- case 'ì ': return true;
- default: return false;
- }
- }
/**
*
@@ -217,21 +204,42 @@ class NounUtil {
return true;
}
+
+ private static boolean isDNoun(char ch) {
+ switch(ch) {
+ case 'ë±':
+ case 'ë¤':
+ case 'ì':
+ case 'ê°':
+ case 'ë¿':
+ case 'ë³':
+ case 'ì ': return true;
+ default: return false;
+ }
+ }
/*
* ë§ì§ë§ ìì ì´ ëª
ì¬í ì 미ì¬(ë±,ì..)ì¸ì§ ì¡°ì¬íë¤.
*/
static boolean confirmDNoun(AnalysisOutput output) {
-
- int strlen = output.getStem().length();
- String d = output.getStem().substring(strlen-1);
- if(d.length() != 1 || !isDNoun(d.charAt(0))) return false;
-
- String s = output.getStem().substring(0, strlen-1);
- output.setNsfx(d);
- output.setStem(s);
+ final String currentStem = output.getStem();
+ // empty or single character
+ if (currentStem.length() <= 1) {
+ return false;
+ }
+
+ // check suffix char
+ final char suffix = currentStem.charAt(currentStem.length()-1);
+ if (!isDNoun(suffix)) {
+ return false;
+ }
+
+ // remove suffix
+ String stem = currentStem.substring(0, currentStem.length()-1);
+ output.setNsfx(Character.toString(suffix));
+ output.setStem(stem);
- WordEntry cnoun = DictionaryUtil.getAllNoun(s);
+ WordEntry cnoun = DictionaryUtil.getAllNoun(stem);
if(cnoun != null) {
if(cnoun.isCompoundNoun())
output.setCNoun(cnoun.getCompounds());