You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/03/28 08:56:37 UTC
svn commit: r1461969 -
/stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
Author: rwesten
Date: Thu Mar 28 07:56:37 2013
New Revision: 1461969
URL: http://svn.apache.org/r1461969
Log:
minor: improved logging for failed language detections. Now the first 200 chars of the processed text are included in the Exception message
Modified:
stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
Modified: stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1461969&r1=1461968&r2=1461969&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java Thu Mar 28 07:56:37 2013
@@ -215,7 +215,8 @@ public class LanguageDetectionEnhancemen
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
- if (text.trim().length() == 0) {
+ //do not call trim() on long texts to check if the text is empty
+ if (text.length() < 50 && text.trim().length() == 0) {
log.info("No text contained in ContentPart {} of ContentItem {}",
contentPart.getKey(),ci.getUri());
return;
@@ -230,10 +231,14 @@ public class LanguageDetectionEnhancemen
try {
languages = languageIdentifier.getLanguages(text);
log.debug("language identified: {}",languages);
- }
- catch (LangDetectException e) {
- log.warn("Could not identify language", e);
- throw new EngineException(this, ci, "Could not identify language", e);
+ } catch (LangDetectException e) {
+ StringBuilder msg = new StringBuilder("Could not identify language of text: ");
+ if(text.length() < 200){
+ msg.append(text);
+ } else {
+ msg.append(text.subSequence(0, 199)).append("...");
+ }
+ throw new EngineException(this, ci, msg.toString(), e);
}
// add language to metadata