You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/03/06 16:21:08 UTC
svn commit: r1574913 - in /stanbol/trunk/enhancement-engines/langdetect: ./
src/main/java/org/apache/stanbol/enhancer/engines/langdetect/
src/test/java/org/apache/stanbol/enhancer/engines/langdetect/
Author: rwesten
Date: Thu Mar 6 15:21:08 2014
New Revision: 1574913
URL: http://svn.apache.org/r1574913
Log:
merged fix for STANBOL-1293 to trunk
Modified:
stanbol/trunk/enhancement-engines/langdetect/ (props changed)
stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java
Propchange: stanbol/trunk/enhancement-engines/langdetect/
------------------------------------------------------------------------------
Merged /stanbol/branches/release-0.12/enhancement-engines/langdetect:r1574911
Modified: stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1574913&r1=1574912&r2=1574913&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java Thu Mar 6 15:21:08 2014
@@ -232,13 +232,23 @@ public class LanguageDetectionEnhancemen
languages = languageIdentifier.getLanguages(text);
log.debug("language identified: {}",languages);
} catch (LangDetectException e) {
- StringBuilder msg = new StringBuilder("Could not identify language of text: ");
- if(text.length() < 200){
- msg.append(text);
+ Enum<?> errorCode = (Enum<?>)e.getCode();
+ //NOTE: https://code.google.com/p/language-detection/issues/detail?id=49
+ //ErrorCode enumeration is not visible. This engine wants to silently
+ //ignore " 0 - NoTextError" and "5 - CantDetectError"
+ if(errorCode.ordinal() != 0 && errorCode.ordinal() != 5) {
+ StringBuilder msg = new StringBuilder("Could not identify language of text: ");
+ if(text.length() < 200){
+ msg.append(text);
+ } else {
+ msg.append(text.subSequence(0, 199)).append("...");
+ }
+ msg.append(" (Error Code: ").append(errorCode.ordinal())
+ .append(" - ").append(errorCode.name()).append(")");
+ throw new EngineException(this, ci, msg.toString(), e);
} else {
- msg.append(text.subSequence(0, 199)).append("...");
+ log.debug("No text to detect the language from present in ContentItem ",ci);
}
- throw new EngineException(this, ci, msg.toString(), e);
}
// add language to metadata
Modified: stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java?rev=1574913&r1=1574912&r2=1574913&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java (original)
+++ stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java Thu Mar 6 15:21:08 2014
@@ -16,7 +16,7 @@
*/
package org.apache.stanbol.enhancer.engines.langdetect;
-import static junit.framework.Assert.assertEquals;
+import static org.junit.Assert.assertEquals;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
import static org.junit.Assert.assertEquals;
@@ -134,4 +134,14 @@ public class LanguageDetectionEngineTest
assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
}
+
+ @Test
+ public void testNonTextContent() throws EngineException, ConfigurationException, LangDetectException, IOException {
+ LanguageDetectionEnhancementEngine langIdEngine = new LanguageDetectionEnhancementEngine();
+ ComponentContext context = new MockComponentContext();
+ context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langdetect");
+ langIdEngine.activate(context);
+ ContentItem ci = ciFactory.createContentItem(new StringSource("123"));
+ langIdEngine.computeEnhancements(ci);
+ }
}