You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/10/10 10:17:30 UTC

svn commit: r1396507 - in /stanbol/trunk/entityhub/indexing/source/jenatdb/src: main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/ test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/ test/resources/testConfigs/quads/ test/resou...

Author: rwesten
Date: Wed Oct 10 08:17:29 2012
New Revision: 1396507

URL: http://svn.apache.org/viewvc?rev=1396507&view=rev
Log:
STANBOL-764: added support for importing Quads

Added:
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexFieldConfig.txt
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexing.properties
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/rdfdata/
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/rdfdata/test.nq
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/testEntityIds.txt
Modified:
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java
    stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java

Modified: stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java?rev=1396507&r1=1396506&r2=1396507&view=diff
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java (original)
+++ stanbol/trunk/entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfResourceImporter.java Wed Oct 10 08:17:29 2012
@@ -30,9 +30,12 @@ import org.openjena.riot.RiotReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.hp.hpl.jena.graph.Node;
 import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.rdf.model.AnonId;
 import com.hp.hpl.jena.rdf.model.Model;
 import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.sparql.core.Quad;
 import com.hp.hpl.jena.tdb.TDBLoader;
 import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
 import com.hp.hpl.jena.tdb.store.bulkloader.BulkLoader;
@@ -70,6 +73,13 @@ public class RdfResourceImporter impleme
             return ResourceState.IGNORED;
         } else if (format == Lang.NTRIPLES) {
             TDBLoader.load(indexingDataset, is, true);
+        } else if(format == Lang.NQUADS || format == Lang.TRIG){ //quads
+            TDBLoader loader = new TDBLoader();
+            loader.setShowProgress(true);
+            Destination<Quad> dest = createQuad2TripleDestination();
+            dest.start();
+            RiotReader.parseQuads(is,format,null, dest);
+            dest.finish();
         } else if (format != Lang.RDFXML) {
             // use RIOT to parse the format but with a special configuration
             // RiotReader!
@@ -129,4 +139,42 @@ public class RdfResourceImporter impleme
         } ;
         return sink ;
     }
+    /**
+     * Creates a Destination that consumes {@link Quad}s and stores
+     * {@link Triple}s to the {@link #indexingDataset}
+     * @return
+     */
+    private Destination<Quad> createQuad2TripleDestination() {
+        LoadMonitor monitor = new LoadMonitor(indexingDataset, 
+            log, "triples",50000,100000);
+        final LoaderNodeTupleTable loaderTriples = new LoaderNodeTupleTable(
+            indexingDataset.getTripleTable().getNodeTupleTable(), "triples", monitor) ;
+
+        Destination<Quad> sink = new Destination<Quad>() {
+            //long count = 0 ;
+            public final void start()
+            {
+                loaderTriples.loadStart() ;
+                loaderTriples.loadDataStart() ;
+            }
+            public final void send(Quad quad)
+            {
+                loaderTriples.load(quad.getSubject(), quad.getPredicate(), quad.getObject()) ;
+                //count++ ;
+            }
+
+            public final void flush() { }
+            public void close() { }
+
+            public final void finish()
+            {
+                loaderTriples.loadDataFinish() ;
+                loaderTriples.loadIndexStart() ;
+                loaderTriples.loadIndexFinish() ;
+                loaderTriples.loadFinish() ;
+            }
+
+        } ;
+        return sink ;
+    }
 }

Modified: stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java?rev=1396507&r1=1396506&r2=1396507&view=diff
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java (original)
+++ stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSourceTest.java Wed Oct 10 08:17:29 2012
@@ -131,6 +131,37 @@ public class RdfIndexingSourceTest {
             NUMBER_OF_ENTITIES_EXPECTED,count), 
             NUMBER_OF_ENTITIES_EXPECTED, count);
     }
+    /**
+     * Tests support for Quads (STANBOL-764)
+     */
+    @Test
+    public void testQuadsImport(){
+        IndexingConfig config = new IndexingConfig(CONFIG_ROOT+"quads",CONFIG_ROOT+"quads"){};
+        EntityIterator entityIdIterator = config.getEntityIdIterator();
+        assertNotNull("Unable to perform test whithout EntityIterator",entityIdIterator);
+        if(entityIdIterator.needsInitialisation()){
+            entityIdIterator.initialise();
+        }
+        EntityDataProvider dataProvider = config.getEntityDataProvider();
+        assertNotNull(dataProvider);
+        assertTrue(dataProvider.needsInitialisation());//there are test data to load
+        dataProvider.initialise();
+        assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
+        long count = 0;
+        while(entityIdIterator.hasNext()){
+            EntityScore entityScore = entityIdIterator.next();
+            assertNotNull(entityScore);
+            assertNotNull(entityScore.id);
+            validateRepresentation(dataProvider.getEntityData(entityScore.id),
+                entityScore.id);
+            count++;
+        }
+        //check if all 9 entities where imported to the default dataset
+        // (and not named graphs)
+        assertEquals(String.format("%s Entities expected but %s processed!",
+            9, count), 
+            9, count);
+    }
 
     /**
      * @param it

Added: stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexFieldConfig.txt
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexFieldConfig.txt?rev=1396507&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexFieldConfig.txt (added)
+++ stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexFieldConfig.txt Wed Oct 10 08:17:29 2012
@@ -0,0 +1 @@
+#This is the default config that would index everything

Added: stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexing.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexing.properties?rev=1396507&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexing.properties (added)
+++ stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/config/indexing.properties Wed Oct 10 08:17:29 2012
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name=simple
+description=Simple Configuration
+
+# use the RDF indexing source as EntityDataProvider
+entityDataProvider=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata
+
+# used in the tests to provide the IDs of the Entities in the test data
+entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,source:testEntityIds.txt,charset:UTF-8,encodeIds:false
+

Added: stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/rdfdata/test.nq
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/rdfdata/test.nq?rev=1396507&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/rdfdata/test.nq (added)
+++ stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/rdfdata/test.nq Wed Oct 10 08:17:29 2012
@@ -0,0 +1,48 @@
+<http://www.telemac0.net/marketing-50/> <http://purl.org/dc/elements/1.1/type> <http://purl.org/dc/dcmitype/Text> <http://www.telemac0.net/marketing-50/>   .
+<http://www.telemac0.net/marketing-50/> <http://purl.org/dc/elements/1.1/title> "telemac0" <http://www.telemac0.net/marketing-50/>   .
+<http://www.telemac0.net/marketing-50/> <http://creativecommons.org/ns#attributionURL> <http://telemac0.net> <http://www.telemac0.net/marketing-50/>   .
+<http://www.telemac0.net/marketing-50/> <http://creativecommons.org/ns#attributionName> "cyb" <http://www.telemac0.net/marketing-50/>   .
+<http://www.telemac0.net/marketing-50/> <http://purl.org/dc/elements/1.1/source> <http://telemac0.net> <http://www.telemac0.net/marketing-50/>   .
+<http://zorg.com.ru/2008/06/30/opyat-nakurilsya/> <http://purl.org/dc/elements/1.1/type> <http://purl.org/dc/dcmitype/InteractiveResource> <http://zorg.com.ru/2008/06/30/opyat-nakurilsya/>   .
+<http://zorg.com.ru/2008/06/30/opyat-nakurilsya/> <http://purl.org/dc/elements/1.1/title> "Blogg Texts" <http://zorg.com.ru/2008/06/30/opyat-nakurilsya/>   .
+<http://zorg.com.ru/2008/06/30/opyat-nakurilsya/> <http://creativecommons.org/ns#attributionURL> <http://zorg.com.ru/> <http://zorg.com.ru/2008/06/30/opyat-nakurilsya/>   .
+<http://zorg.com.ru/2008/06/30/opyat-nakurilsya/> <http://creativecommons.org/ns#attributionName> "Sergey Agarkov" <http://zorg.com.ru/2008/06/30/opyat-nakurilsya/>   .
+<http://zorg.com.ru/2008/06/30/opyat-nakurilsya/> <http://purl.org/dc/elements/1.1/source> <http://zorg.com.ru/> <http://zorg.com.ru/2008/06/30/opyat-nakurilsya/>   .
+<http://www.logosportswear.com/product/794> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rdf.data-vocabulary.org/#Product> <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/794> <http://rdf.data-vocabulary.org/#photo> ""@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/794> <http://rdf.data-vocabulary.org/#url> "Colored Handle Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/794> <http://rdf.data-vocabulary.org/#name> "Colored Handle Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/794> <http://rdf.data-vocabulary.org/#price> "12.51"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/158> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rdf.data-vocabulary.org/#Product> <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/158> <http://rdf.data-vocabulary.org/#photo> ""@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/158> <http://rdf.data-vocabulary.org/#url> "All Purpose Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/158> <http://rdf.data-vocabulary.org/#name> "All Purpose Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/158> <http://rdf.data-vocabulary.org/#price> "16.06"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1531> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rdf.data-vocabulary.org/#Product> <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1531> <http://rdf.data-vocabulary.org/#photo> ""@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1531> <http://rdf.data-vocabulary.org/#url> "Harbor Cruise Boat Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1531> <http://rdf.data-vocabulary.org/#name> "Harbor Cruise Boat Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1531> <http://rdf.data-vocabulary.org/#price> "19.47"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/14109> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rdf.data-vocabulary.org/#Product> <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/14109> <http://rdf.data-vocabulary.org/#photo> ""@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/14109> <http://rdf.data-vocabulary.org/#url> "Contemporary Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/14109> <http://rdf.data-vocabulary.org/#name> "Contemporary Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/14109> <http://rdf.data-vocabulary.org/#price> "16.47"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/4373> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rdf.data-vocabulary.org/#Product> <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/4373> <http://rdf.data-vocabulary.org/#photo> ""@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/4373> <http://rdf.data-vocabulary.org/#url> "Zippered Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/4373> <http://rdf.data-vocabulary.org/#name> "Zippered Tote"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/4373> <http://rdf.data-vocabulary.org/#price> "14.92"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1337> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rdf.data-vocabulary.org/#Product> <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1337> <http://rdf.data-vocabulary.org/#photo> ""@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1337> <http://rdf.data-vocabulary.org/#url> "Ensigns Boat Bag"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1337> <http://rdf.data-vocabulary.org/#name> "Ensigns Boat Bag"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/1337> <http://rdf.data-vocabulary.org/#price> "15.93"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/11710> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rdf.data-vocabulary.org/#Product> <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/11710> <http://rdf.data-vocabulary.org/#photo> ""@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/11710> <http://rdf.data-vocabulary.org/#url> "New ! Tees and Totes Combination Package"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/11710> <http://rdf.data-vocabulary.org/#name> "New ! Tees and Totes Combination Package"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+<http://www.logosportswear.com/product/11710> <http://rdf.data-vocabulary.org/#price> "22.95"@en <http://www.logosportswear.com/imprintedtotebagspersonalized.php>   .
+_:node16nh2jpdcx57591272 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Agent> <http://www.huffingtonpost.com/2009/01/13/obama-issues-first-veto-t_n_157585.html>   .
+_:node16nh2jpdcx57591272 <http://xmlns.com/foaf/0.1/homepage> <http://www.huffingtonpost.com/news/barack-obama/> <http://www.huffingtonpost.com/2009/01/13/obama-issues-first-veto-t_n_157585.html>   .
+_:node16nh2jpdcx57591272 <http://xmlns.com/foaf/0.1/name> "Barack Obama"@en <http://www.huffingtonpost.com/2009/01/13/obama-issues-first-veto-t_n_157585.html>   .

Added: stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/testEntityIds.txt
URL: http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/testEntityIds.txt?rev=1396507&view=auto
==============================================================================
--- stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/testEntityIds.txt (added)
+++ stanbol/trunk/entityhub/indexing/source/jenatdb/src/test/resources/testConfigs/quads/indexing/resources/testEntityIds.txt Wed Oct 10 08:17:29 2012
@@ -0,0 +1,9 @@
+http://www.telemac0.net/marketing-50/	100
+http://zorg.com.ru/2008/06/30/opyat-nakurilsya/	99
+http://www.logosportswear.com/product/794	99
+http://www.logosportswear.com/product/158	99
+http://www.logosportswear.com/product/1531	99
+http://www.logosportswear.com/product/14109	99
+http://www.logosportswear.com/product/4373	99
+http://www.logosportswear.com/product/1337	99
+http://www.logosportswear.com/product/11710	99
\ No newline at end of file