You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/27 19:50:55 UTC
svn commit: r1536184 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko:
dic/DictionaryUtil.java dic/HangulDictionary.java morph/EomiUtil.java
morph/WordSpaceAnalyzer.java
Author: rmuir
Date: Sun Oct 27 18:50:54 2013
New Revision: 1536184
URL: http://svn.apache.org/r1536184
Log:
LUCENE-4956: replace some getWord != null with hasWord
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sun Oct 27 18:50:54 2013
@@ -83,6 +83,11 @@ public class DictionaryUtil {
}
}
+ /** true if this word exists */
+ public static boolean hasWord(CharSequence key) {
+ return dictionary.lookup(key) != null;
+ }
+
/** true if something with this prefix exists */
public static boolean hasWordPrefix(CharSequence prefix) {
return dictionary.hasPrefix(prefix);
@@ -98,78 +103,43 @@ public class DictionaryUtil {
}
}
- /** Looks up noun, compound noun, or adverb */
- public static WordEntry getWordExceptVerb(String key) {
+ /** returns word (or null) matching specified features */
+ private static WordEntry getWord(String key, int on, int off) {
Byte clazz = dictionary.lookup(key);
if (clazz == null) {
return null;
}
char flags = dictionary.getFlags(clazz);
- if ((flags & (WordEntry.NOUN | WordEntry.BUSA)) != 0) {
+ if ((flags & on) != 0 && (flags & off) == 0) {
return new WordEntry(key, flags, clazz);
} else {
return null;
}
}
+ /** Looks up noun, compound noun, or adverb */
+ public static WordEntry getWordExceptVerb(String key) {
+ return getWord(key, WordEntry.NOUN | WordEntry.BUSA, 0);
+ }
+
/** Looks up a noun (but not compound noun) */
public static WordEntry getNoun(String key) {
- Byte clazz = dictionary.lookup(key);
- if (clazz == null) {
- return null;
- }
- char flags = dictionary.getFlags(clazz);
- if ((flags & WordEntry.NOUN) != 0 && (flags & WordEntry.COMPOUND) == 0) {
- return new WordEntry(key, flags, clazz);
- } else {
- return null;
- }
+ return getWord(key, WordEntry.NOUN, WordEntry.COMPOUND);
}
- /**
- * return all noun including compound noun
- */
+ /** return all noun including compound noun */
public static WordEntry getAllNoun(String key) {
- Byte clazz = dictionary.lookup(key);
- if (clazz == null) {
- return null;
- }
- char flags = dictionary.getFlags(clazz);
- if ((flags & WordEntry.NOUN) != 0) {
- return new WordEntry(key, flags, clazz);
- } else {
- return null;
- }
+ return getWord(key, WordEntry.NOUN, 0);
}
- /**
- * returns any verb
- */
+ /** returns any verb */
public static WordEntry getVerb(String key) {
- Byte clazz = dictionary.lookup(key);
- if (clazz == null) {
- return null;
- }
- char flags = dictionary.getFlags(clazz);
- if ((flags & WordEntry.VERB) != 0) {
- return new WordEntry(key, flags, clazz);
- } else {
- return null;
- }
+ return getWord(key, WordEntry.VERB, 0);
}
/** Looks up an adverb-only */
public static WordEntry getBusa(String key) {
- Byte clazz = dictionary.lookup(key);
- if (clazz == null) {
- return null;
- }
- char flags = dictionary.getFlags(clazz);
- if ((flags & WordEntry.BUSA) != 0 && (flags & WordEntry.NOUN) == 0) {
- return new WordEntry(key, flags, clazz);
- } else {
- return null;
- }
+ return getWord(key, WordEntry.BUSA, WordEntry.NOUN);
}
/** return list of irregular compounds for word class. */
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/HangulDictionary.java Sun Oct 27 18:50:54 2013
@@ -73,7 +73,7 @@ class HangulDictionary {
}
/** looks up word class for a word (exact match) */
- Byte lookup(String key) {
+ Byte lookup(CharSequence key) {
// TODO: why is does this thing lookup empty strings?
if (key.length() == 0) {
return null;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/EomiUtil.java Sun Oct 27 18:50:54 2013
@@ -125,7 +125,9 @@ class EomiUtil {
if(nChrs.length==2&&chrs[index]=='ì'&&(chrs.length<=index+1||
(chrs.length>index+1&&chrs[index+1]!='ì
¨'))) {
- if(DictionaryUtil.getWord(results[0])!=null) return results; //'ì'ê° í¬í¨ë ë¨ì´ê° ìë¤. ì±ê°ìë¤/ëìë¤/ë¤ì¤ìë¤
+ if (DictionaryUtil.hasWord(results[0])) {
+ return results; //'ì'ê° í¬í¨ë ë¨ì´ê° ìë¤. ì±ê°ìë¤/ëìë¤/ë¤ì¤ìë¤
+ }
pomi = chrs[index]+pomi;
setPomiResult(results,stem.substring(0,index),pomi);
if(--index==0||chrs[index]!='ì¼') return results; // ë¤ìì´ê±°ë ì ì´ë§ì´ë¯¸ê° ìë¤ë©´...
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java?rev=1536184&r1=1536183&r2=1536184&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordSpaceAnalyzer.java Sun Oct 27 18:50:54 2013
@@ -115,7 +115,7 @@ public class WordSpaceAnalyzer {
if(output.getLastEnd()<input.length()) {
String source = input.substring(output.getLastEnd());
- int score = DictionaryUtil.getWord(source)==null ? AnalysisOutput.SCORE_ANALYSIS : AnalysisOutput.SCORE_CORRECT;
+ int score = DictionaryUtil.hasWord(source) ? AnalysisOutput.SCORE_CORRECT : AnalysisOutput.SCORE_ANALYSIS;
AnalysisOutput o =new AnalysisOutput(source,null,null,PatternConstants.POS_NOUN,
PatternConstants.PTN_N,score);
@@ -325,7 +325,7 @@ public class WordSpaceAnalyzer {
) {
eend--;
}else if(pvword!=null&&candidates.get(0).getPatn()>=PatternConstants.PTN_VM&& // ëª
ì¬ + ì©ì¸ ì´êµ¬ ì¤ì.. ì©ì¸ì´êµ¬ë¡ ë¨ì´ë¥¼ ì´ë£¨ë ê²½ì°ë ìë¤.
- candidates.get(0).getPatn()<=PatternConstants.PTN_VMXMJ && DictionaryUtil.getWord(input)!=null){
+ candidates.get(0).getPatn()<=PatternConstants.PTN_VMXMJ && DictionaryUtil.hasWord(input)){
candidates.clear();
break;
}else if(pvword!=null&&VerbUtil.verbSuffix(candidates.get(0).getStem())
@@ -550,7 +550,7 @@ public class WordSpaceAnalyzer {
for (int i = es; i < str.length(); i++) {
if (SyllableFeatures.hasFeature(str.charAt(i), SyllableFeatures.JOSA1)) {
- return DictionaryUtil.getWord(str.substring(ws,i)) != null;
+ return DictionaryUtil.hasWord(str.substring(ws,i));
}
}