You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/03/04 10:07:29 UTC

svn commit: r1296772 - in /incubator/stanbol/trunk/enhancer/engines/tika: pom.xml src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java src/test/resources/log4j.properties

Author: rwesten
Date: Sun Mar  4 09:07:29 2012
New Revision: 1296772

URL: http://svn.apache.org/viewvc?rev=1296772&view=rev
Log:
STANBOL-512: Unit Test do no longer to depend on the time zone of the host. Tests now use log4j for logging. This allows to log debug level loggings of the Tika Engine.


Added:
    incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/log4j.properties   (with props)
Modified:
    incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
    incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java

Modified: incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/pom.xml?rev=1296772&r1=1296771&r2=1296772&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/pom.xml Sun Mar  4 09:07:29 2012
@@ -141,6 +141,16 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>  <!-- used for debug level logging during tests -->
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
 </project>

Modified: incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java?rev=1296772&r1=1296771&r2=1296772&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java Sun Mar  4 09:07:29 2012
@@ -35,14 +35,20 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.Charset;
+import java.text.DateFormatSymbols;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.Arrays;
+import java.util.Date;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.Locale;
 import java.util.Map.Entry;
 import java.util.Set;
 import java.util.regex.Pattern;
 
 import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.PlainLiteral;
 import org.apache.clerezza.rdf.core.Resource;
@@ -50,6 +56,7 @@ import org.apache.clerezza.rdf.core.Trip
 import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.impl.util.W3CDateFormat;
 import org.apache.clerezza.rdf.ontologies.DC;
 import org.apache.clerezza.rdf.ontologies.RDF;
 import org.apache.clerezza.rdf.ontologies.XSD;
@@ -69,12 +76,21 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TikaEngineTest {
 
+    private static final Logger log = LoggerFactory.getLogger(TikaEngineTest.class);
     private static TikaEngine engine;
     private static MockComponentContext context;
-    
+    private static LiteralFactory lf = LiteralFactory.getInstance();
+    /**
+     * Required to make this test independent of the timeZone of the local host.
+     */
+    private static SimpleDateFormat dateDefaultTimezone =
+            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", new DateFormatSymbols(Locale.US));
+
     @BeforeClass
     public static void setUpServices() throws IOException {
         context = new MockComponentContext();
@@ -91,6 +107,7 @@ public class TikaEngineTest {
 
     @Test
     public void testHtml() throws EngineException, IOException {
+        log.info(">>> testHtml <<<");
         ContentItem ci = createContentItem("test.html", "text/html; charset=UTF-8");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -119,6 +136,7 @@ public class TikaEngineTest {
     }
     @Test
     public void testPdf() throws EngineException, IOException {
+        log.info(">>> testPdf <<<");
         //PDF created by Apple Pages
         ContentItem ci = createContentItem("test.pdf", "application/pdf");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
@@ -173,10 +191,10 @@ public class TikaEngineTest {
             "The Apache Stanbol Enhancer",
             "The Stanbol enhancer can detect famous cities",
             "</body></html>");
-
     }
     @Test
     public void testMsWord() throws EngineException, IOException {
+        log.info(">>> testMsWord <<<");
         ContentItem ci = createContentItem("test.doc", "application/msword");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -204,6 +222,7 @@ public class TikaEngineTest {
             "</body></html>");    }
     @Test
     public void testRtf() throws EngineException, IOException {
+        log.info(">>> testRtf <<<");
         ContentItem ci = createContentItem("test.rtf", "application/rtf");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -232,6 +251,7 @@ public class TikaEngineTest {
     }
     @Test
     public void testOdt() throws EngineException, IOException {
+        log.info(">>> testOdt <<<");
         ContentItem ci = createContentItem("test.odt", "application/vnd.oasis.opendocument.text");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -259,7 +279,8 @@ public class TikaEngineTest {
             "</body></html>");
     }
     @Test
-    public void testEMail() throws EngineException, IOException {
+    public void testEMail() throws EngineException, IOException, ParseException {
+        log.info(">>> testEMail <<<");
         ContentItem ci = createContentItem("test.email.txt", "message/rfc822");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -312,7 +333,8 @@ public class TikaEngineTest {
         
     }
     @Test
-    public void testMp3() throws EngineException, IOException {
+    public void testMp3() throws EngineException, IOException, ParseException {
+        log.info(">>> testMp3 <<<");
         ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -344,7 +366,8 @@ public class TikaEngineTest {
         verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "MP3");
     }
     @Test
-    public void testGEOMetadata() throws EngineException, IOException{
+    public void testGEOMetadata() throws EngineException, IOException, ParseException{
+        log.info(">>> testGEOMetadata <<<");
         //first validate Media Resource Ontology
         UriRef hasLocation = new UriRef(NamespaceEnum.media+"hasLocation");
         UriRef locationLatitude = new UriRef(NamespaceEnum.media+"locationLatitude");
@@ -375,7 +398,8 @@ public class TikaEngineTest {
     
 
     
-    public void testMetadata() throws EngineException {
+    public void testMetadata() throws EngineException, ParseException {
+        log.info(">>> testMetadata <<<");
         ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -390,14 +414,15 @@ public class TikaEngineTest {
         verifyValue(ci, new UriRef(NamespaceEnum.media+"hasCreator"),null,"Test Artist");
     }
     @Test
-    public void testExifMetadata() throws EngineException {
+    public void testExifMetadata() throws EngineException, ParseException {
+        log.info(">>> testExifMetadata <<<");
         String exif = "http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";
         ContentItem ci = createContentItem("testJPEG_EXIF.jpg", "image/jpeg");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
         verifyValue(ci, new UriRef(exif+"make"),null,"Canon");
         verifyValue(ci, new UriRef(exif+"software"),null,"Adobe Photoshop CS3 Macintosh");
-        verifyValue(ci, new UriRef(exif+"dateTimeOriginal"),XSD.dateTime,"2009-08-11T07:09:45Z");
+        verifyValue(ci, new UriRef(exif+"dateTimeOriginal"),XSD.dateTime,"2009-08-11T09:09:45");
         verifyValue(ci, new UriRef(exif+"relatedImageWidth"),XSD.int_,"100");
         verifyValue(ci, new UriRef(exif+"fNumber"),XSD.double_,"5.6");
         verifyValue(ci, new UriRef(exif+"model"),null,"Canon EOS 40D");
@@ -414,17 +439,18 @@ public class TikaEngineTest {
         verifyValue(ci, new UriRef(NamespaceEnum.media+"frameHeight"),XSD.int_,"68");
         verifyValue(ci, new UriRef(NamespaceEnum.media+"frameWidth"),XSD.int_,"100");
         verifyValue(ci, new UriRef(NamespaceEnum.media+"hasFormat"),null,"image/jpeg");
-        verifyValue(ci, new UriRef(NamespaceEnum.media+"creationDate"),XSD.dateTime,"2009-08-11T07:09:45Z");
+        verifyValue(ci, new UriRef(NamespaceEnum.media+"creationDate"),XSD.dateTime,"2009-08-11T09:09:45");
         verifyValues(ci, new UriRef(NamespaceEnum.media+"hasKeyword"),null,"serbor","moscow-birds","canon-55-250");
         //and finally the mapped DC properties
         verifyValue(ci, DC.format,null,"image/jpeg");
-        verifyValue(ci, DC.date,XSD.dateTime,"2009-08-11T07:09:45Z");
-        verifyValue(ci, new UriRef(NamespaceEnum.dc+"modified"),XSD.dateTime,"2009-10-02T21:02:49Z");
+        verifyValue(ci, DC.date,XSD.dateTime,"2009-08-11T09:09:45");
+        verifyValue(ci, new UriRef(NamespaceEnum.dc+"modified"),XSD.dateTime,"2009-10-02T23:02:49");
         verifyValues(ci, DC.subject, null, "serbor","moscow-birds","canon-55-250");
     }
     
     @Test
     public void testContentTypeDetection() throws EngineException, IOException {
+        log.info(">>> testContentTypeDetection <<<");
         ContentItem ci = createContentItem("test.pdf", OCTET_STREAM.toString());
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -456,6 +482,7 @@ public class TikaEngineTest {
      */
     @Test
     public void testText() throws EngineException {
+        log.info(">>> testText <<<");
         byte[] data = ("The Stanbol enhancer can " +
                 "detect famous cities such as Paris and people such as Bob " +
                 "Marley.").getBytes(Charset.forName("UTF-8"));
@@ -464,6 +491,7 @@ public class TikaEngineTest {
     }
     @Test
     public void testUnsupported() throws EngineException, IOException {
+        log.info(">>> testUnsupported <<<");
         ContentItem ci = createContentItem("test.pages", "application/x-iwork-pages-sffpages");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -473,11 +501,10 @@ public class TikaEngineTest {
         assertNull(contentPart);
         //only the original content
         assertEquals(1, ContentItemHelper.getContentParts(ci, Blob.class).size());
-        
-        
     }
     @Test
     public void testXhtml() throws EngineException, IOException {
+        log.info(">>> testXhtml <<<");
         ContentItem ci = createContentItem("test.xhtml", XHTML.toString()+"; charset=UTF-8");
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
@@ -573,10 +600,10 @@ public class TikaEngineTest {
         assertEquals(value,r);
         return (UriRef)r;
    }
-    private static Literal verifyValue(ContentItem ci, UriRef property, UriRef dataType, String lexValue){
+    private static Literal verifyValue(ContentItem ci, UriRef property, UriRef dataType, String lexValue) throws ParseException{
         return verifyValue(ci, ci.getUri(), property, dataType, lexValue);
     }
-    private static Literal verifyValue(ContentItem ci, NonLiteral subject, UriRef property, UriRef dataType, String lexValue){
+    private static Literal verifyValue(ContentItem ci, NonLiteral subject, UriRef property, UriRef dataType, String lexValue) throws ParseException{
         Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
         assertTrue(it.hasNext());
         Resource r = it.next().getObject();
@@ -587,7 +614,14 @@ public class TikaEngineTest {
             assertTrue(r instanceof TypedLiteral);
             assertEquals(dataType, ((TypedLiteral)r).getDataType());
         }
-        assertEquals(lexValue,((Literal)r).getLexicalForm());
+        //if we check dates and the lexical value is not UTC than we need to
+        //consider the time zone of the host running this test
+        if(XSD.dateTime.equals(dataType) && lexValue.charAt(lexValue.length()-1) != 'Z'){
+            Date expectedDate = dateDefaultTimezone.parse(lexValue);
+            assertEquals(expectedDate, lf.createObject(Date.class, ((TypedLiteral)r)));
+        } else {
+            assertEquals(lexValue,((Literal)r).getLexicalForm());
+        }
         return (Literal)r;
     }
     private static Set<Literal> verifyValues(ContentItem ci, UriRef property, UriRef dataType, String...lexValues){

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/log4j.properties?rev=1296772&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/log4j.properties (added)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/log4j.properties Sun Mar  4 09:07:29 2012
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Root logger option
+log4j.rootLogger=INFO, stdout
+ 
+# Direct log messages to stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
+log4j.logger.org.apache.stanbol.enhancer.engines.tika=DEBUG
\ No newline at end of file

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/log4j.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain