You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/17 14:04:37 UTC
svn commit: r1339560 - in
/incubator/stanbol/branches/celi-enhancement-engines: ./
engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/
engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/...
Author: rwesten
Date: Thu May 17 12:04:36 2012
New Revision: 1339560
URL: http://svn.apache.org/viewvc?rev=1339560&view=rev
Log:
merged revisions 1339554 1339557 1339558 from trunk (STANBOL-613,STANBOL-617); STANBOL-583: Adapted CELI Language Identification Engine to STANBOL-613, moved logging of EnhancementsResults (for unit tests) to an own test-utility class
Added:
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/testutils/TestUtils.java
Modified:
incubator/stanbol/branches/celi-enhancement-engines/ (props changed)
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/utils/Utils.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java
incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java
Propchange: incubator/stanbol/branches/celi-enhancement-engines/
------------------------------------------------------------------------------
svn:mergeinfo = /incubator/stanbol/trunk/enhancer:1339554,1339557-1339558
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java Thu May 17 12:04:36 2012
@@ -1,26 +1,22 @@
package org.apache.stanbol.enhancer.engines.celi.classification.impl;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.Dictionary;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.Vector;
-import java.util.Map.Entry;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NoConvertorException;
-import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.felix.scr.annotations.Activate;
@@ -37,9 +33,9 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java Thu May 17 12:04:36 2012
@@ -5,7 +5,6 @@ import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.URL;
-import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.List;
import java.util.Vector;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java Thu May 17 12:04:36 2012
@@ -1,7 +1,9 @@
package org.apache.stanbol.enhancer.engines.celi.langid.impl;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
import java.io.IOException;
import java.net.MalformedURLException;
@@ -10,8 +12,8 @@ import java.util.Collections;
import java.util.Dictionary;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.Map.Entry;
+import java.util.Set;
import javax.xml.soap.SOAPException;
@@ -35,9 +37,9 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
@@ -170,6 +172,7 @@ public class CeliLanguageIdentifierEnhan
UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(gl.getLang())));
g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(gl.getConfidence())));
+ g.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
} finally {
ci.getLock().writeLock().unlock();
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java Thu May 17 12:04:36 2012
@@ -4,7 +4,6 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
-import java.net.URI;
import java.net.URL;
import java.util.Collections;
import java.util.List;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java Thu May 17 12:04:36 2012
@@ -1,8 +1,6 @@
package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
import static org.apache.stanbol.enhancer.engines.celi.utils.Utils.getSelectionContext;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
@@ -12,12 +10,11 @@ import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.Dictionary;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.Vector;
-import java.util.Map.Entry;
import javax.xml.soap.SOAPException;
@@ -25,8 +22,6 @@ import org.apache.clerezza.rdf.core.Lang
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
@@ -44,9 +39,9 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java Thu May 17 12:04:36 2012
@@ -23,7 +23,6 @@ import javax.xml.soap.SOAPPart;
import javax.xml.transform.stream.StreamSource;
import org.apache.clerezza.rdf.core.impl.util.Base64;
-import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
import org.slf4j.Logger;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java Thu May 17 12:04:36 2012
@@ -1,7 +1,11 @@
package org.apache.stanbol.enhancer.engines.celi.ner.impl;
import static org.apache.stanbol.enhancer.engines.celi.utils.Utils.getSelectionContext;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.*;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
import java.io.IOException;
import java.net.URL;
@@ -11,12 +15,10 @@ import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.Map.Entry;
-import java.util.Vector;
+import java.util.Set;
import javax.xml.soap.SOAPException;
@@ -26,7 +28,6 @@ import org.apache.clerezza.rdf.core.Lite
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NoConvertorException;
import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
@@ -38,16 +39,15 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
-import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java Thu May 17 12:04:36 2012
@@ -1,11 +1,9 @@
package org.apache.stanbol.enhancer.engines.celi.ner.impl;
import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
-import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.Charset;
@@ -23,7 +21,6 @@ import javax.xml.soap.SOAPPart;
import javax.xml.transform.stream.StreamSource;
import org.apache.clerezza.rdf.core.impl.util.Base64;
-import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
import org.slf4j.Logger;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/utils/Utils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/utils/Utils.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/utils/Utils.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/utils/Utils.java Thu May 17 12:04:36 2012
@@ -22,8 +22,6 @@ import java.net.URL;
import java.util.Map;
import java.util.Map.Entry;
-import org.apache.clerezza.rdf.core.impl.util.Base64;
-
public final class Utils {
private Utils(){}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java Thu May 17 12:04:36 2012
@@ -18,13 +18,12 @@ import org.apache.clerezza.rdf.core.UriR
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
-import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.TestUtils;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -75,6 +74,9 @@ public class CeliClassificationEnhanceme
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
classificationEngine.computeEnhancements(ci);
+
+ TestUtils.logEnhancements(ci);
+
int textAnnoNum = checkAllTextAnnotations(ci.getMetadata(), TEXT);
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum = checkAllEntityAnnotations(ci.getMetadata());
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java Thu May 17 12:04:36 2012
@@ -1,37 +1,21 @@
package org.apache.stanbol.enhancer.engines.celi.langid.impl;
import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertFalse;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
-import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEnhancement;
-import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateLanguageAnnotation;
-import static org.junit.Assert.assertTrue;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
import java.io.IOException;
import java.net.UnknownHostException;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.List;
-
-import junit.framework.Assert;
import org.apache.clerezza.rdf.core.LiteralFactory;
-import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
-import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
-import org.apache.stanbol.enhancer.engines.celi.classification.impl.CeliClassificationEnhancementEngine;
-import org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine;
-import org.apache.stanbol.enhancer.engines.celi.ner.impl.CeliNamedEntityExtractionEnhancementEngine;
import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.TestUtils;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -39,7 +23,6 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
-import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -82,19 +65,20 @@ public class CeliLanguageIdentifierEnhan
ContentItem ci = wrapAsContentItem(TEXT);
try {
langIdentifier.computeEnhancements(ci);
- HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+
+ TestUtils.logEnhancements(ci);
+
+ HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
langIdentifier.getClass().getName()));
-
- PlainLiteral detectedLnaguage = validateLanguageAnnotation(ci.getMetadata(), TEXT,expectedValues);
+ int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
+ assertEquals("A single TextAnnotation is expected by this Test", 1,numTextAnnotations);
//even through this tests do not validate service quality but rather
//the correct integration of the CELI service as EnhancementEngine
//we expect the "fr" is detected for the parsed text
assertEquals("The detected language for text '"+TEXT+"' MUST BE 'fr'",
- "fr",detectedLnaguage.getLexicalForm());
- assertEquals("The value oft the returned language is not the expected one",
- detectedLnaguage.getLexicalForm(),EnhancementEngineHelper.getLanguage(ci));
+ "fr",EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java Thu May 17 12:04:36 2012
@@ -1,6 +1,5 @@
package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
-import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.TURTLE;
import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.MORPHOLOGICAL_ANALYSIS;
import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.SERVICE_URL;
import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.hasLemmaForm;
@@ -9,7 +8,6 @@ import static org.apache.stanbol.enhance
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
-import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEnhancement;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateTextAnnotation;
import static org.junit.Assert.assertEquals;
@@ -17,10 +15,8 @@ import static org.junit.Assert.assertFal
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.UnknownHostException;
-import java.nio.charset.Charset;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.Hashtable;
@@ -37,10 +33,10 @@ import org.apache.clerezza.rdf.core.Type
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
-import org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider;
import org.apache.clerezza.rdf.ontologies.XSD;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.TestUtils;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -102,7 +98,7 @@ public class CeliLemmatizerEnhancementEn
throw e;
}
- logEnhancements(ci);
+ TestUtils.logEnhancements(ci);
//validate enhancement
HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
@@ -142,7 +138,7 @@ public class CeliLemmatizerEnhancementEn
throw e;
}
- logEnhancements(ci);
+ TestUtils.logEnhancements(ci);
//validate enhancements
HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
@@ -170,13 +166,6 @@ public class CeliLemmatizerEnhancementEn
Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum); shutdownEngine(morphoAnalysisEngine);
}
- private void logEnhancements(ContentItem ci) {
- log.info("Enhancement Results of Lemmatizer Engine for Text: \n {} ",TEXT);
- JenaSerializerProvider serializer = new JenaSerializerProvider();
- ByteArrayOutputStream logOut = new ByteArrayOutputStream();
- serializer.serialize(logOut, ci.getMetadata(), TURTLE);
- log.info("Enhancements: \n{}",new String(logOut.toByteArray(),Charset.forName("UTF-8")));
- }
/**
* [1..*] values of an {@link PlainLiteral} in the same language as the
* analyzed text
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java Thu May 17 12:04:36 2012
@@ -16,9 +16,8 @@ import org.apache.clerezza.rdf.core.impl
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.engines.celi.classification.impl.CeliClassificationEnhancementEngine;
-import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
-import org.apache.stanbol.enhancer.engines.celi.ner.impl.CeliNamedEntityExtractionEnhancementEngine;
import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.TestUtils;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -73,6 +72,9 @@ public class CeliNamedEntityExtractionEn
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
nerEngine.computeEnhancements(ci);
+
+ TestUtils.logEnhancements(ci);
+
HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/testutils/TestUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/testutils/TestUtils.java?rev=1339560&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/testutils/TestUtils.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/testutils/TestUtils.java Thu May 17 12:04:36 2012
@@ -0,0 +1,27 @@
+package org.apache.stanbol.enhancer.engines.celi.testutils;
+
+import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.TURTLE;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.charset.Charset;
+
+import org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public final class TestUtils {
+
+ private static final Logger log = LoggerFactory.getLogger(TestUtils.class);
+
+ private TestUtils(){}
+
+ public static void logEnhancements(ContentItem ci) {
+ JenaSerializerProvider serializer = new JenaSerializerProvider();
+ ByteArrayOutputStream logOut = new ByteArrayOutputStream();
+ serializer.serialize(logOut, ci.getMetadata(), TURTLE);
+ log.info("Enhancements: \n{}",new String(logOut.toByteArray(),Charset.forName("UTF-8")));
+ }
+
+
+}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java Thu May 17 12:04:36 2012
@@ -17,6 +17,8 @@
package org.apache.stanbol.enhancer.engines.langid;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
import java.io.IOException;
import java.util.Collections;
@@ -167,6 +169,7 @@ public class LangIdEnhancementEngine
try {
UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(language)));
+ g.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
} finally {
ci.getLock().writeLock().unlock();
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java Thu May 17 12:04:36 2012
@@ -18,7 +18,7 @@ package org.apache.stanbol.enhancer.engi
import static junit.framework.Assert.assertEquals;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
-import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateLanguageAnnotation;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -26,8 +26,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
+
import org.apache.clerezza.rdf.core.LiteralFactory;
-import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.commons.io.IOUtils;
@@ -84,7 +84,12 @@ public class LangIdEngineTest {
String language = tc.getLanguage();
assertEquals("en", language);
}
-
+ /**
+ * Test the engine and validates the created enhancements
+ * @throws EngineException
+ * @throws IOException
+ * @throws ConfigurationException
+ */
@Test
public void testEngine() throws EngineException, IOException, ConfigurationException {
LangIdEnhancementEngine langIdEngine = new LangIdEnhancementEngine();
@@ -97,14 +102,13 @@ public class LangIdEngineTest {
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
langIdEngine.getClass().getName()));
- PlainLiteral detectedLnaguage = validateLanguageAnnotation(ci.getMetadata(), text,expectedValues);
+ int textAnnotationCount = validateAllTextAnnotations(ci.getMetadata(), text, expectedValues);
+ assertEquals("A single TextAnnotation is expected", 1,textAnnotationCount);
//even through this tests do not validate service quality but rather
//the correct integration of the CELI service as EnhancementEngine
//we expect the "en" is detected for the parsed text
assertEquals("The detected language for text '"+text+"' MUST BE 'en'",
- "en",detectedLnaguage.getLexicalForm());
- assertEquals("The value oft the returned language is not the expected one",
- detectedLnaguage.getLexicalForm(),EnhancementEngineHelper.getLanguage(ci));
+ "en",EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/main/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.java Thu May 17 12:04:36 2012
@@ -16,7 +16,10 @@
*/
package org.apache.stanbol.enhancer.engines.zemanta.impl;
+import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement;
+import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTopicEnhancement;
import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getReferences;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.SKOS_CONCEPT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
@@ -73,6 +76,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.osgi.framework.BundleContext;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
@@ -232,6 +236,8 @@ public class ZemantaEnhancementEngine
protected void processCategories(MGraph results, MGraph enhancements, UriRef ciId) {
Iterator<Triple> categories = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Category.getUri());
+ //add the root Text annotation as soon as the first TopicAnnotation is added.
+ UriRef textAnnotation = null;
while (categories.hasNext()) {
NonLiteral category = categories.next().getSubject();
log.debug("process category " + category);
@@ -245,8 +251,16 @@ public class ZemantaEnhancementEngine
if (categorisationScheme != null && categorisationScheme.equals(ZemantaOntologyEnum.categorization_DMOZ.getUri())) {
String categoryTitle = EnhancementEngineHelper.getString(results, target, ZemantaOntologyEnum.title.getUri());
if (categoryTitle != null) {
- //now write the Stanbol Enhancer entity enhancement
- UriRef categoryEnhancement = EnhancementEngineHelper.createEntityEnhancement(enhancements, this, ciId);
+ if(textAnnotation == null){
+ //this is the first category ... create the TextAnnotation used
+ //to link all fise:TopicAnnotations
+ textAnnotation = createTextEnhancement(enhancements, this, ciId);
+ enhancements.add(new TripleImpl(textAnnotation,DC_TYPE,SKOS_CONCEPT));
+ }
+ //now write the TopicAnnotation
+ UriRef categoryEnhancement = createTopicEnhancement(enhancements, this, ciId);
+ //make related to the EntityAnnotation
+ enhancements.add(new TripleImpl(categoryEnhancement, DC_RELATION, textAnnotation));
//write the title
enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(categoryTitle)));
//write the reference
@@ -256,14 +270,16 @@ public class ZemantaEnhancementEngine
}
//write the confidence
if (confidence != null) {
- enhancements.add(
- new TripleImpl(categoryEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
+ enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_CONFIDENCE,
+ literalFactory.createTypedLiteral(confidence)));
}
- //we need to write the entity type and the dc:type
+ //we need to write the fise:entity-type
+ //as of STANBOL-617 we use now both the zemanta:Category AND the skos:Concept
+ //type. dc:type is no longer used as this is only used by fise:TextAnnotations
// see http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories
// for more Information
- enhancements.add(new TripleImpl(categoryEnhancement, DC_TYPE, ENHANCER_CATEGORY));
- //Use the Zemanta Category as type for the referred Entity
+ enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, SKOS_CONCEPT));
+ //Use also Zemanta Category as type for the referred Entity
enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, ZemantaOntologyEnum.Category.getUri()));
} else {
log.warn("Unable to process category " + category + " because no title is present");
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/zemanta/src/test/java/org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngineTest.java Thu May 17 12:04:36 2012
@@ -120,6 +120,8 @@ public class ZemantaEnhancementEngineTes
log.info(textAnnoNum + " TextAnnotations found ...");
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(),expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
+ int topicAnnoNum = EnhancementStructureHelper.validateAllTopicAnnotations(ci.getMetadata(),expectedValues);
+ log.info(entityAnnoNum + " TopicAnnotations found ...");
}
public static void main(String[] args) throws Exception{
Modified: incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Thu May 17 12:04:36 2012
@@ -131,6 +131,39 @@ public class EnhancementEngineHelper {
return enhancement;
}
/**
+ * Create a new instance with the types enhancer:Enhancement and
+ * enhancer:TopicAnnotation in the parsed graph along with default properties
+ * (dc:creator, dc:created and enhancer:extracted-form) and return
+ * the UriRef of the extraction so that engines can further add.
+ *
+ * @param metadata the graph
+ * @param engine the engine
+ * @param contentItemId the id
+ *
+ * @return the URI of the new enhancement instance
+ */
+ public static UriRef createTopicEnhancement(MGraph metadata,
+ EnhancementEngine engine, UriRef contentItemId){
+ UriRef enhancement = createEnhancement(metadata, engine, contentItemId);
+ metadata.add(new TripleImpl(enhancement, Properties.RDF_TYPE,
+ TechnicalClasses.ENHANCER_TOPICANNOTATION));
+ return enhancement;
+ }
+ /**
+ * Create a new instance with the types enhancer:Enhancement and
+ * enhancer:TopicAnnotation in the metadata-graph of the content
+ * item along with default properties (dc:creator and dc:created) and return
+ * the UriRef of the extraction so that engines can further add
+ *
+ * @param ci the ContentItem being under analysis
+ * @param engine the Engine performing the analysis
+ * @return the URI of the new enhancement instance
+ */
+ public static UriRef createTopicEnhancement(ContentItem ci,
+ EnhancementEngine engine){
+ return createTopicEnhancement(ci.getMetadata(), engine, new UriRef(ci.getUri().getUnicodeString()));
+ }
+ /**
* Create a new enhancement instance in the metadata-graph of the content
* item along with default properties (dc:creator and dc:created) and return
* the UriRef of the extraction so that engines can further add.
Modified: incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java Thu May 17 12:04:36 2012
@@ -24,8 +24,9 @@ import org.apache.clerezza.rdf.core.UriR
*
* @author ogrisel
*/
-public class TechnicalClasses {
-
+public final class TechnicalClasses {
+
+ private TechnicalClasses() {}
/**
* Type used for all enhancement created by Stanbol Enhancer
*/
@@ -87,11 +88,22 @@ public class TechnicalClasses {
* Used to indicate, that an EntityAnnotation describes an Categorisation.
* see <a href="http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories">
* Mapping of Categories</a> for more Information)
+ * @deprecated the preferred rdf:type for categories and topics is
+ * {@link OntologicalClasses#SKOS_CONCEPT} (see
+ * <a href="https://issues.apache.org/jira/browse/STANBOL-617">STANBOL-617</a>)
*/
public static final UriRef ENHANCER_CATEGORY = new UriRef(
NamespaceEnum.fise + "Category");
- private TechnicalClasses() {
- }
+ /**
+ * DC terms Linguistic System is the type used as Range for the dc:language
+ * property. As this property is also used for describing the language
+ * as identified for analysed content this type is used as dc:type for
+ * {@value #ENHANCER_TEXTANNOTATION} describing the language of the text
+ * (see
+ * <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a>)
+ */
+ public static final UriRef DCTERMS_LINGUISTIC_SYSTEM = new UriRef(
+ NamespaceEnum.dc + "LinguisticSystem");
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java?rev=1339560&r1=1339559&r2=1339560&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java Thu May 17 12:04:36 2012
@@ -1,6 +1,11 @@
package org.apache.stanbol.enhancer.test.helper;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_ORGANISATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PERSON;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PLACE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
@@ -8,8 +13,11 @@ import static org.apache.stanbol.enhance
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENHANCEMENT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TOPICANNOTATION;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
@@ -37,6 +45,7 @@ import org.apache.clerezza.rdf.ontologie
import org.apache.clerezza.rdf.ontologies.XSD;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
@@ -83,28 +92,39 @@ public class EnhancementStructureHelper
* @param expectedValues expected values (properties for the values are used as keys)
*/
public static void validateTextAnnotation(TripleCollection enhancements, UriRef textAnnotation, String content, Map<UriRef,Resource> expectedValues) {
+ //validate the rdf:type
+ Iterator<Triple> rdfTypeIterator = enhancements.filter(textAnnotation, RDF_TYPE, ENHANCER_TEXTANNOTATION);
+ assertTrue("Parsed Enhancement "+textAnnotation +" is missing the fise:TextAnnotation type ",
+ rdfTypeIterator.hasNext());
Iterator<Triple> selectedTextIterator = enhancements.filter(textAnnotation,
ENHANCER_SELECTED_TEXT, null);
- // check if the selected text is added
- assertTrue("TextAnnotations MUST have a fise:selected-text value (uri: "+textAnnotation+")",
- selectedTextIterator.hasNext());
- // test if the selected text is part of the TEXT_TO_TEST
- Resource selectedTextResource = selectedTextIterator.next().getObject();
- assertTrue("fise:selected-text MUST BE of type PlainLiteral (uri: "+textAnnotation+")",
- selectedTextResource instanceof PlainLiteral);
- Literal selectedText = (Literal)selectedTextResource;
- assertTrue("The parsed content MUST contain the fise:selected-text value '"
- +selectedText.getLexicalForm()+"' (uri: "+textAnnotation+")!",content.contains(selectedText.getLexicalForm()));
+ // check if the selected text is added (or not)
+ Resource selectedTextResource;
+ if(selectedTextIterator.hasNext()){
+ // test if the selected text is part of the TEXT_TO_TEST
+ selectedTextResource = selectedTextIterator.next().getObject();
+ assertTrue("fise:selected-text MUST BE of type PlainLiteral (uri: "+textAnnotation+")",
+ selectedTextResource instanceof PlainLiteral);
+ Literal selectedText = (Literal)selectedTextResource;
+ assertTrue("The parsed content MUST contain the fise:selected-text value '"
+ +selectedText.getLexicalForm()+"' (uri: "+textAnnotation+")!",content.contains(selectedText.getLexicalForm()));
+ } else {
+ selectedTextResource = null; //no selected text
+ }
+ //check against an expected value
Resource expectedSelectedText = expectedValues.get(ENHANCER_SELECTED_TEXT);
if(expectedSelectedText != null){
assertEquals("The fise:selected-text is not the expected value "+expectedSelectedText+" (uri: "+textAnnotation+")!",
- expectedSelectedText, selectedText);
+ expectedSelectedText, selectedTextResource);
}
Resource selectionContextResource;
// test if context is added
Iterator<Triple> selectionContextIterator = enhancements.filter(textAnnotation,
ENHANCER_SELECTION_CONTEXT, null);
if(selectionContextIterator.hasNext()) { //context is optional
+ //selection context is not allowed without selected-text
+ assertNotNull("If fise:selection-context is present also fise:selected-text MUST BE present (uri: "+textAnnotation+")",
+ selectedTextResource);
// test if the selected text is part of the TEXT_TO_TEST
selectionContextResource = selectionContextIterator.next().getObject();
assertTrue("The fise:selection-context MUST BE of type PlainLiteral (uri: "+textAnnotation+")",
@@ -115,8 +135,9 @@ public class EnhancementStructureHelper
//check that the context contains the selected text
assertTrue("The fise:selected-text value MUST BE containted within the fise:selection-context value",
((Literal)selectionContextResource).getLexicalForm().contains(
- selectedText.getLexicalForm()));
+ ((Literal)selectedTextResource).getLexicalForm()));
} else {
+ assertNull("If no fise:selection-context is present also fise:selected-text MUST BE NOT present!", selectedTextResource);
selectionContextResource = null;
}
Resource expectedSelectionContext = expectedValues.get(ENHANCER_SELECTION_CONTEXT);
@@ -133,8 +154,11 @@ public class EnhancementStructureHelper
TypedLiteral startPosLiteral;
TypedLiteral endPosLiteral;
if(startPosIterator.hasNext()){
- assertNotNull("If fise:start is present the fise:selection-context MUST also be present (uri: "+textAnnotation+")!",
- selectionContextResource);
+ //NOTE: TextAnnotations might be use to select whole sections of a text
+ // (e.g. see STANBOL-617) in those cases adding the text of the
+ // whole section is not feasible.
+ //assertNotNull("If fise:start is present the fise:selection-context MUST also be present (uri: "+textAnnotation+")!",
+ // selectionContextResource);
Resource resource = startPosIterator.next().getObject();
//only a single start position is supported
assertFalse("fise:start MUST HAVE only a single value (uri: "+textAnnotation+")!",startPosIterator.hasNext());
@@ -159,10 +183,13 @@ public class EnhancementStructureHelper
endPosLiteral = null;
//check for equality of the selected text and the text on the selected position in the content
//System.out.println("TA ["+start+"|"+end+"]"+selectedText.getLexicalForm()+"<->"+content.substring(start,end));
- assertEquals("the substring [fise:start,fise:end] does not correspond to "
- + "the fise:selected-text value '"+selectedText.getLexicalForm()
- + "' of this TextAnnotation!",content.substring(start, end), selectedText.getLexicalForm());
+ if(selectedTextResource != null){
+ assertEquals("the substring [fise:start,fise:end] does not correspond to "
+ + "the fise:selected-text value '"+((Literal)selectedTextResource).getLexicalForm()
+ + "' of this TextAnnotation!",content.substring(start, end), ((Literal)selectedTextResource).getLexicalForm());
+ } // else no selected-text present ... unable to test this
} else {
+ assertNull("if fise:selected-text is present also fise:start AND fise:end MUST BE present!",selectedTextResource);
assertNull("If fise:selection-context is present also fise:start AND fise:end MUST BE present!",selectionContextResource);
assertFalse("if fise:end is presnet also fise:start AND fise:selection-context MUST BE present!",endPosIterator.hasNext());
startPosLiteral = null;
@@ -181,8 +208,76 @@ public class EnhancementStructureHelper
//validate fise:Enhancement specific rules
validateEnhancement(enhancements, textAnnotation, expectedValues);
+
+ //validate for special TextAnnotations
+ validateLanguageAnnotations(enhancements,textAnnotation);
+ validateNERAnnotations(enhancements,textAnnotation, selectedTextResource);
+ }
+ /**
+ * Validates the correctness of fise:TextAnnotations that annotate the language
+ * of the text as defined by
+ * <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a><p>
+ * Called by {@link #validateTextAnnotation(TripleCollection, UriRef, String, Map)}
+ * @param enhancements
+ * @param textAnnotation
+ */
+ private static void validateLanguageAnnotations(TripleCollection enhancements, UriRef textAnnotation) {
+ Iterator<Triple> dcLanguageIterator = enhancements.filter(textAnnotation, DC_LANGUAGE, null);
+ if(dcLanguageIterator.hasNext()){ //a language annotation
+ Resource dcLanguageResource = dcLanguageIterator.next().getObject();
+ assertTrue("The dc:language value MUST BE a PlainLiteral", dcLanguageResource instanceof PlainLiteral);
+ assertTrue("The dc:language value '"+dcLanguageResource+"'MUST BE at least two chars long",
+ ((Literal)dcLanguageResource).getLexicalForm().length() >=2);
+ assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:language value (uri "
+ +textAnnotation+")",dcLanguageIterator.hasNext());
+
+ Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
+ assertTrue("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri "
+ +textAnnotation+")", dcTypeIterator.hasNext());
+ assertEquals("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri "
+ +textAnnotation+")", DCTERMS_LINGUISTIC_SYSTEM,dcTypeIterator.next().getObject());
+ assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:type value (uri "
+ +textAnnotation+")",dcTypeIterator.hasNext());
+ //assert that the created TextAnnotation is correctly returned by the
+ //EnhancementEngineHelper methods
+ List<NonLiteral> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(enhancements);
+ assertTrue("Language annotation "+textAnnotation+" was not returned by "
+ +"EnhancementEngineHelper.getLanguageAnnotations(..)!",languageAnnotation.contains(textAnnotation));
+ } else { //no language annotation
+ Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
+ while(dcTypeIterator.hasNext()){
+ assertFalse("Only fise:TextAnnotations without a dc:language value MUST NOT use the "
+ + "dc:type value dc:LinguisticSystem (uri "+textAnnotation+")",
+ DCTERMS_LINGUISTIC_SYSTEM.equals(dcTypeIterator.next().getObject()));
+ }
+ }
+
+ }
+ /**
+ * Validates that fise:TextAnnotations with the dc:type dbp-ont:Person,
+ * dbp-ont:Organisation and dbp-ont:Place do have a
+ * fise:selected-text value (this implicitly also checks that
+ * fise:selection-context, fise:start and fise:end are defined!<p>
+ * Called by {@link #validateTextAnnotation(TripleCollection, UriRef, String, Map)}
+ * @param enhancements
+ * @param textAnnotation
+ * @param selectedTextResource the fise:selected-text value
+ */
+ private static void validateNERAnnotations(TripleCollection enhancements, UriRef textAnnotation, Resource selectedTextResource) {
+ Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
+ boolean isNERAnnotation = false;
+ while(dcTypeIterator.hasNext() && !isNERAnnotation){
+ Resource dcTypeValue = dcTypeIterator.next().getObject();
+ isNERAnnotation = DBPEDIA_PERSON.equals(dcTypeValue) ||
+ DBPEDIA_ORGANISATION.equals(dcTypeValue) ||
+ DBPEDIA_PLACE.equals(dcTypeValue);
+ }
+ if(isNERAnnotation){
+ assertNotNull("fise:TextAnnotations with a dc:type of c:type dbp-ont:Person, "
+ +"dbp-ont:Organisation or dbp-ont:Place MUST have a fise:selected-text value (uri "
+ +textAnnotation+")", selectedTextResource);
+ }
}
-
/**
* Validates all fise:EntityAnnotations contained by the parsed enhancements
* graph.
@@ -221,11 +316,7 @@ public class EnhancementStructureHelper
Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(
entityAnnotation, DC_RELATION, null);
// check if the relation to the text annotation is set
- //TODO: currently it is not required that all EntityAnnotations are linked to
- // an TextAnnotation, because EntityAnnotations are also used for
- // Topics (that do not explicitly occur in texts.
- // This might change as soon there is an own Topic type!
- //assertTrue(relationToTextAnnotationIterator.hasNext());
+ assertTrue(relationToTextAnnotationIterator.hasNext());
while (relationToTextAnnotationIterator.hasNext()) {
// test if the referred annotations are text annotations
UriRef referredTextAnnotation = (UriRef) relationToTextAnnotationIterator.next().getObject();
@@ -289,6 +380,10 @@ public class EnhancementStructureHelper
* @param expectedValues expected values (properties for the values are used as keys)
*/
public static void validateEnhancement(TripleCollection enhancements, UriRef enhancement, Map<UriRef,Resource> expectedValues){
+ //validate the rdf:type
+ Iterator<Triple> rdfTypeIterator = enhancements.filter(enhancement, RDF_TYPE, ENHANCER_ENHANCEMENT);
+ assertTrue("Parsed Enhancement "+enhancement +" is missing the fise:Enhancement type ",
+ rdfTypeIterator.hasNext());
//validate the creator
Iterator<Triple> creatorIterator = enhancements.filter(enhancement, Properties.DC_CREATOR, null);
assertTrue("Enhancements MUST HAVE a creator",creatorIterator.hasNext());
@@ -388,30 +483,107 @@ public class EnhancementStructureHelper
assertFalse("Only a single dc:type value is allowed!", dcTypeIterator.hasNext());
}
}
- public static PlainLiteral validateLanguageAnnotation(MGraph g, String content,HashMap<UriRef,Resource> expectedValues) {
- Iterator<Triple> textAnnotationIterator = g.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
- // test if a textAnnotation is present
- assertTrue("The Language Annotation is missing!",textAnnotationIterator.hasNext());
- NonLiteral annotation = textAnnotationIterator.next().getSubject();
- assertTrue("TextAnnotations MUST BE URIs", annotation instanceof UriRef);
- assertFalse("Only a single Language Annotation is expected!", textAnnotationIterator.hasNext());
- //validate enhancement metadata (this also checks the confidence)
- validateEnhancement(g, (UriRef)annotation, expectedValues);
- //validate the dc:language value
- Iterator<Triple> languageIterator = g.filter(annotation, Properties.DC_LANGUAGE, null);
- assertTrue("The fise:TextAnnotation for the language MUST HAVE a value for dc:language!",languageIterator.hasNext());
- Resource languageResource = languageIterator.next().getObject();
- assertFalse("Only a single dc:langauge value MUST BE present!", languageIterator.hasNext());
- assertTrue("The dc:langauge value MUST BE a plain literal",languageResource instanceof PlainLiteral);
- assertTrue("The dc:language value MIST BE at least two chars long",
- ((PlainLiteral)languageResource).getLexicalForm().length()>=2);
- //assert that the created TextAnnotation is correctly returned by the
- //EnhancementEngineHelper methods
- List<NonLiteral> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(g);
- assertFalse("No langauge Annotation was extracted by the EnhancementEngineHelper#getLanguageAnnotations(..) method",
- languageAnnotation.isEmpty());
- assertEquals("The returned language annotation was not the one created by this engine",
- annotation, languageAnnotation.get(0));
- return (PlainLiteral)languageResource;
+ /**
+ * Validates all fise:TopicAnnotations contained by the parsed enhancements
+ * graph.
+ * @param enhancements the enhancement graph
+ * @param expectedValues the expected values of all validated TopicAnnotations.
+ * Properties are used as keys. Typical example would be fise:extracted-from
+ * with the id of the ContentItem as value; dc-terms:creator with the
+ * {@link Class#getName()} as value.
+ * @return the number of found and validated TopicAnnotations.
+ */
+ @SuppressWarnings("unchecked")
+ public static int validateAllTopicAnnotations(TripleCollection enhancements,Map<UriRef,Resource> expectedValues) {
+ expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
+ Iterator<Triple> topicAnnotationIterator = enhancements.filter(null,
+ RDF_TYPE, ENHANCER_TOPICANNOTATION);
+ int topicAnnotationCount = 0;
+ while (topicAnnotationIterator.hasNext()) {
+ UriRef topicAnnotation = (UriRef) topicAnnotationIterator.next().getSubject();
+ // test if selected Text is added
+ validateTopicAnnotation(enhancements, topicAnnotation,
+ expectedValues);
+ topicAnnotationCount++;
+ }
+ return topicAnnotationCount;
}
+
+ /**
+ * Checks if a fise:TopicAnnotation is valid as defined by
+ * <a herf="https://issues.apache.org/jira/browse/STANBOL-617">STANBOL-617</a>.
+ * NOTE that this also validates all fise:Enhancement related requirements by
+ * calling {@link #validateEnhancement(TripleCollection, UriRef, Map)}
+ * @param enhancements the enhancements graph
+ * @param topicAnnotation the topic annotation to validate
+ * @param expectedValues expected values (properties for the values are used as keys)
+ */
+ public static void validateTopicAnnotation(TripleCollection enhancements, UriRef topicAnnotation, Map<UriRef,Resource> expectedValues){
+ //validate the rdf:type
+ Iterator<Triple> rdfTypeIterator = enhancements.filter(topicAnnotation, RDF_TYPE, ENHANCER_TOPICANNOTATION);
+ assertTrue("Parsed Enhancement "+topicAnnotation +" is missing the fise:TopicAnnotation type ",
+ rdfTypeIterator.hasNext());
+
+ //TopicAnnotations need to be linked to TextAnnotations describing the
+ //section of the text that has a specific Topic.
+ //If the topic is for the whole text the TextAnnotation will have no
+ //selected-text value
+ Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(
+ topicAnnotation, DC_RELATION, null);
+ // check if the relation to the text annotation is set
+ assertTrue(relationToTextAnnotationIterator.hasNext());
+ while (relationToTextAnnotationIterator.hasNext()) {
+ // test if the referred annotations are text annotations
+ UriRef referredTextAnnotation = (UriRef) relationToTextAnnotationIterator.next().getObject();
+ assertTrue(enhancements.filter(referredTextAnnotation, RDF_TYPE,
+ ENHANCER_TEXTANNOTATION).hasNext());
+ }
+
+ // test if an entity (the topic) is referred (NOTE: in contrast to
+ // fise:EntityAnnotations this property is NOT required - cardinality [0..*]
+ Iterator<Triple> entityReferenceIterator = enhancements.filter(topicAnnotation,
+ ENHANCER_ENTITY_REFERENCE, null);
+ Resource expectedReferencedEntity = expectedValues.get(ENHANCER_ENTITY_REFERENCE);
+ while(entityReferenceIterator.hasNext()){ //check possible multiple references
+ Resource entityReferenceResource = entityReferenceIterator.next().getObject();
+ // test if the reference is an URI
+ assertTrue("fise:entity-reference value MUST BE of URIs",entityReferenceResource instanceof UriRef);
+ if(expectedReferencedEntity != null && expectedReferencedEntity.equals(entityReferenceResource)){
+ expectedReferencedEntity = null; //found
+ }
+ }
+ assertNull("EntityAnnotation "+topicAnnotation+"fise:entity-reference has not the expected value "
+ +expectedReferencedEntity+"!", expectedReferencedEntity);
+
+ //test if the entity label is set (required)
+ Iterator<Triple> entityLabelIterator = enhancements.filter(topicAnnotation, ENHANCER_ENTITY_LABEL, null);
+ assertTrue(entityLabelIterator.hasNext());
+ Resource expectedEntityLabel = expectedValues.get(ENHANCER_ENTITY_LABEL);
+ while(entityLabelIterator.hasNext()){
+ Resource entityLabelResource = entityLabelIterator.next().getObject();
+ assertTrue("fise:entity-label values MUST BE PlainLiterals (EntityAnnotation: "+topicAnnotation+")!",
+ entityLabelResource instanceof PlainLiteral);
+ if(expectedEntityLabel != null && expectedEntityLabel.equals(entityLabelResource)){
+ expectedEntityLabel = null;
+ }
+ }
+ assertNull("The expected EntityLabel "+expectedEntityLabel+" was not found",
+ expectedEntityLabel);
+
+ // test fise:entity-type(s). NOTE: this is not required - cardinality [0..*]
+ Iterator<Triple> entityTypeIterator = enhancements.filter(topicAnnotation, Properties.ENHANCER_ENTITY_TYPE, null);
+ Resource expectedEntityType = expectedValues.get(Properties.ENHANCER_ENTITY_TYPE);
+ if(entityTypeIterator.hasNext()){
+ Resource entityTypeResource = entityTypeIterator.next().getObject();
+ assertTrue("fise:entity-type values MUST BE URIs",entityTypeResource instanceof UriRef);
+ if(expectedEntityType != null && expectedEntityType.equals(entityTypeResource)){
+ expectedEntityType = null; //found
+ }
+ }
+ assertNull("The expected fise:entity-type value "+expectedEntityType+" was not found!", expectedEntityType);
+
+ //test all properties required by fise:Enhancement
+ validateEnhancement(enhancements, topicAnnotation, expectedValues);
+ }
+
}