You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/03/06 16:21:08 UTC

svn commit: r1574913 - in /stanbol/trunk/enhancement-engines/langdetect: ./ src/main/java/org/apache/stanbol/enhancer/engines/langdetect/ src/test/java/org/apache/stanbol/enhancer/engines/langdetect/

Author: rwesten
Date: Thu Mar  6 15:21:08 2014
New Revision: 1574913

URL: http://svn.apache.org/r1574913
Log:
merged fix for STANBOL-1293 to trunk

Modified:
    stanbol/trunk/enhancement-engines/langdetect/   (props changed)
    stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
    stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java

Propchange: stanbol/trunk/enhancement-engines/langdetect/
------------------------------------------------------------------------------
  Merged /stanbol/branches/release-0.12/enhancement-engines/langdetect:r1574911

Modified: stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1574913&r1=1574912&r2=1574913&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java Thu Mar  6 15:21:08 2014
@@ -232,13 +232,23 @@ public class LanguageDetectionEnhancemen
             languages = languageIdentifier.getLanguages(text);
             log.debug("language identified: {}",languages);
         } catch (LangDetectException e) {
-            StringBuilder msg = new StringBuilder("Could not identify language of text: ");
-            if(text.length() < 200){
-                msg.append(text);
+            Enum<?> errorCode = (Enum<?>)e.getCode();
+            //NOTE: https://code.google.com/p/language-detection/issues/detail?id=49
+            //ErrorCode enumeration is not visible. This engine wants to silently
+            //ignore " 0 - NoTextError" and "5 - CantDetectError"
+            if(errorCode.ordinal() != 0 && errorCode.ordinal() != 5) {
+                StringBuilder msg = new StringBuilder("Could not identify language of text: ");
+                if(text.length() < 200){
+                    msg.append(text);
+                } else {
+                    msg.append(text.subSequence(0, 199)).append("...");
+                }
+                msg.append(" (Error Code: ").append(errorCode.ordinal())
+                        .append(" - ").append(errorCode.name()).append(")");
+                throw new EngineException(this, ci, msg.toString(), e);
             } else {
-                msg.append(text.subSequence(0, 199)).append("...");
+                log.debug("No text to detect the language from present in ContentItem ",ci);
             }
-            throw new EngineException(this, ci, msg.toString(), e);
         }
         
         // add language to metadata

Modified: stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java?rev=1574913&r1=1574912&r2=1574913&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java (original)
+++ stanbol/trunk/enhancement-engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java Thu Mar  6 15:21:08 2014
@@ -16,7 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.langdetect;
 
-import static junit.framework.Assert.assertEquals;
+import static org.junit.Assert.assertEquals;
 import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
 import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
 import static org.junit.Assert.assertEquals;
@@ -134,4 +134,14 @@ public class LanguageDetectionEngineTest
         assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
 
     }
+    
+    @Test
+    public void testNonTextContent() throws EngineException, ConfigurationException, LangDetectException, IOException {
+        LanguageDetectionEnhancementEngine langIdEngine = new LanguageDetectionEnhancementEngine();
+        ComponentContext context =  new MockComponentContext();
+        context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langdetect");
+        langIdEngine.activate(context);
+        ContentItem ci = ciFactory.createContentItem(new StringSource("123"));
+        langIdEngine.computeEnhancements(ci);
+    }
 }