You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/03/28 08:56:37 UTC

svn commit: r1461969 - /stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java

Author: rwesten
Date: Thu Mar 28 07:56:37 2013
New Revision: 1461969

URL: http://svn.apache.org/r1461969
Log:
minor: improved logging for failed language detections. Now the first 200 chars of the processed text are included in the Exception message

Modified:
    stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java

Modified: stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1461969&r1=1461968&r2=1461969&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java Thu Mar 28 07:56:37 2013
@@ -215,7 +215,8 @@ public class LanguageDetectionEnhancemen
         } catch (IOException e) {
             throw new InvalidContentException(this, ci, e);
         }
-        if (text.trim().length() == 0) {
+        //do not call trim() on long texts to check if the text is empty
+        if (text.length() < 50  && text.trim().length() == 0) {
             log.info("No text contained in ContentPart {} of ContentItem {}",
                 contentPart.getKey(),ci.getUri());
             return;
@@ -230,10 +231,14 @@ public class LanguageDetectionEnhancemen
         try {
             languages = languageIdentifier.getLanguages(text);
             log.debug("language identified: {}",languages);
-        }
-        catch (LangDetectException e) {
-            log.warn("Could not identify language", e);
-            throw new EngineException(this, ci, "Could not identify language", e);
+        } catch (LangDetectException e) {
+            StringBuilder msg = new StringBuilder("Could not identify language of text: ");
+            if(text.length() < 200){
+                msg.append(text);
+            } else {
+                msg.append(text.subSequence(0, 199)).append("...");
+            }
+            throw new EngineException(this, ci, msg.toString(), e);
         }
         
         // add language to metadata