You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2013/04/10 18:16:03 UTC

svn commit: r1466544 - in /jena/Experimental/jena-text: ./ src/main/java/examples/ src/main/java/org/apache/jena/query/text/ src/main/java/org/apache/jena/query/text/assembler/

Author: andy
Date: Wed Apr 10 16:16:03 2013
New Revision: 1466544

URL: http://svn.apache.org/r1466544
Log:
Simplify programmatic construction of text datasets.
Allow a Lucense directory name of "mem" for in-memory index.


Added:
    jena/Experimental/jena-text/data.ttl
    jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java
    jena/Experimental/jena-text/text-config.ttl
Removed:
    jena/Experimental/jena-text/text-query.mdtext
Modified:
    jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
    jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
    jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
    jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java

Added: jena/Experimental/jena-text/data.ttl
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/data.ttl?rev=1466544&view=auto
==============================================================================
--- jena/Experimental/jena-text/data.ttl (added)
+++ jena/Experimental/jena-text/data.ttl Wed Apr 10 16:16:03 2013
@@ -0,0 +1,10 @@
+@prefix :        <http://example/> .
+@prefix xsd:     <http://www.w3.org/2001/XMLSchema#> .
+@prefix rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs:    <http://www.w3.org/2000/01/rdf-schema#> .
+
+:x1 rdfs:label "X1 word" .
+:x1 rdfs:label "X1 word" .
+:x2 rdfs:label "X2 word" .
+:x3 rdfs:label "X3 word" .
+:x1 rdfs:label "X9 word" .
\ No newline at end of file

Added: jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java?rev=1466544&view=auto
==============================================================================
--- jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java (added)
+++ jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java Wed Apr 10 16:16:03 2013
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package examples;
+
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.atlas.logging.Log ;
+import org.apache.jena.query.text.EntityDefinition ;
+import org.apache.jena.query.text.TextDatasetFactory ;
+import org.apache.jena.query.text.TextQuery ;
+import org.apache.jena.riot.RDFDataMgr ;
+import org.apache.lucene.store.Directory ;
+import org.apache.lucene.store.RAMDirectory ;
+import org.slf4j.Logger ;
+import org.slf4j.LoggerFactory ;
+
+import com.hp.hpl.jena.query.* ;
+import com.hp.hpl.jena.rdf.model.Model ;
+import com.hp.hpl.jena.sparql.util.QueryExecUtils ;
+import com.hp.hpl.jena.vocabulary.RDFS ;
+
+/** Build a text search dataset */
+public class JenaTextExample1
+{
+    static { Log.setLog4j() ; }
+    static Logger log = LoggerFactory.getLogger("JenaTextExample") ;
+    
+    public static void main(String ... argv)
+    {
+        TextQuery.init();
+        Dataset ds = createCode() ;
+        //Dataset ds = createAssembler() ;
+        loadData(ds , "data.ttl") ;
+        queryData(ds) ;
+    }
+    
+    public static Dataset createCode() 
+    {
+        log.info("Construct an in-memory dataset with in-memory lucene index using code") ;
+        // Build a text dataset by code.
+        // Here , in-memory base data and in-memeory Lucene index
+
+        // Base data
+        Dataset ds1 = DatasetFactory.createMem() ; 
+
+        // Define the index mapping 
+        EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+
+        // Lucene, in memory.
+        Directory dir =  new RAMDirectory();
+        
+        // Join together into a dataset
+        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef) ;
+        
+        return ds ;
+    }
+
+    public static Dataset createAssembler() 
+    {
+        log.info("Construct text dataset using an assembler description") ;
+        // There are two datasets in the configuration:
+        // the one for the base data and one with text index.
+        // Therafore we need to name the dataset we are interested in. 
+        Dataset ds = DatasetFactory.assemble("text-config.ttl", "http://localhost/jena_example/#text_dataset") ;
+        return ds ;
+    }
+    
+    public static void loadData(Dataset dataset, String file)
+    {
+        log.info("Start loading") ;
+        long startTime = System.nanoTime() ;
+        dataset.begin(ReadWrite.WRITE) ;
+        try {
+            Model m = dataset.getDefaultModel() ;
+            RDFDataMgr.read(m, "D.ttl") ;
+            //RDFDataMgr.read(dataset, "D.ttl") ;
+            dataset.commit() ;
+        } finally { dataset.end() ; }
+        
+        long finishTime = System.nanoTime() ;
+        double time = (finishTime-startTime)/1.0e6 ;
+        log.info(String.format("Finish loading - %.2fms", time)) ;
+        
+        //RDFDataMgr.read(ds, file) ;
+    }
+
+    public static void queryData(Dataset dataset)
+    {
+        log.info("START") ;
+        long startTime = System.nanoTime() ;
+        String pre = StrUtils.strjoinNL
+            ( "PREFIX : <http://example/>"
+            , "PREFIX text: <http://jena.apache.org/text#>"
+            , "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>") ;
+        
+        String qs = StrUtils.strjoinNL
+            ( "SELECT * "
+            , " { ?s text:query (rdfs:label 'X1') ;"
+            , "      rdfs:label ?label"
+            , " }") ; 
+        dataset.begin(ReadWrite.READ) ;
+        try {
+            Query q = QueryFactory.create(pre+"\n"+qs) ;
+            QueryExecution qexec = QueryExecutionFactory.create(q , dataset) ;
+            QueryExecUtils.executeQuery(q, qexec) ;
+        } finally { dataset.end() ; }
+        long finishTime = System.nanoTime() ;
+        double time = (finishTime-startTime)/1.0e6 ;
+        log.info(String.format("FINISH - %.2fms", time)) ;
+
+    }
+
+}
+

Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java Wed Apr 10 16:16:03 2013
@@ -29,6 +29,8 @@ import com.hp.hpl.jena.sparql.core.assem
 
 public class TextDatasetFactory
 {
+    static { TextQuery.init(); }
+    
     /** Use an assembler file to build a dataset with text search capabilities */ 
     public static Dataset create(String assemblerFile)
     {
@@ -36,10 +38,10 @@ public class TextDatasetFactory
     }
 
     /** Create a text-indexed dataset */ 
-    public static Dataset create(Dataset base, TextIndex index)
+    public static Dataset create(Dataset base, TextIndex textIndex)
     {
         DatasetGraph dsg = base.asDatasetGraph() ;
-        dsg = create(dsg, index) ;
+        dsg = create(dsg, textIndex) ;
         return DatasetFactory.create(dsg) ;
     }
 
@@ -48,7 +50,10 @@ public class TextDatasetFactory
     public static DatasetGraph create(DatasetGraph dsg, TextIndex textIndex)
     {
         TextDocProducer producer = new TextDocProducerTriples(textIndex.getDocDef(), textIndex) ;
-        return new DatasetGraphText(dsg, textIndex, producer) ;
+        DatasetGraph dsgt = new DatasetGraphText(dsg, textIndex, producer) ;
+        dsgt.getContext().set(TextQuery.textIndex, textIndex) ;
+        return dsgt ;
+
     }
     
     /** Create a Lucene TextIndex */ 

Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java Wed Apr 10 16:16:03 2013
@@ -112,7 +112,7 @@ public class TextIndexLucene implements 
         try {
             Document doc = doc(entity) ;
             indexWriter.addDocument(doc) ;
-        } catch (Exception e) { exception(e) ; }
+        } catch (IOException e) { exception(e) ; }
     }
 
     private Document doc(Entity entity)

Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java Wed Apr 10 16:16:03 2013
@@ -23,7 +23,6 @@ import static org.apache.jena.query.text
 import static org.apache.jena.query.text.assembler.TextVocab.textDataset ;
 import org.apache.jena.query.text.TextDatasetFactory ;
 import org.apache.jena.query.text.TextIndex ;
-import org.apache.jena.query.text.TextQuery ;
 
 import com.hp.hpl.jena.assembler.Assembler ;
 import com.hp.hpl.jena.assembler.Mode ;
@@ -50,7 +49,6 @@ public class TextDatasetAssembler extend
     @Override
     public Dataset open(Assembler a, Resource root, Mode mode)
     {
-        //Log.info(TextDatasetAssembler.class, "Text dataset index") ;
         Resource dataset = GraphUtils.getResourceValue(root, pDataset) ;
         Resource index   = GraphUtils.getResourceValue(root, pIndex) ;
         
@@ -58,7 +56,6 @@ public class TextDatasetAssembler extend
         TextIndex textIndex = (TextIndex)a.open(index) ;
         
         Dataset dst = TextDatasetFactory.create(ds, textIndex) ;
-        dst.getContext().set(TextQuery.textIndex, textIndex) ;
         return dst ;
         
     }

Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java Wed Apr 10 16:16:03 2013
@@ -26,16 +26,18 @@ import java.io.IOException ;
 
 import org.apache.jena.atlas.io.IO ;
 import org.apache.jena.atlas.lib.IRILib ;
-import org.apache.jena.atlas.logging.Log ;
 import org.apache.jena.query.text.EntityDefinition ;
 import org.apache.jena.query.text.TextDatasetFactory ;
 import org.apache.jena.query.text.TextIndex ;
+import org.apache.jena.query.text.TextIndexException ;
 import org.apache.lucene.store.Directory ;
 import org.apache.lucene.store.FSDirectory ;
+import org.apache.lucene.store.RAMDirectory ;
 
 import com.hp.hpl.jena.assembler.Assembler ;
 import com.hp.hpl.jena.assembler.Mode ;
 import com.hp.hpl.jena.assembler.assemblers.AssemblerBase ;
+import com.hp.hpl.jena.rdf.model.RDFNode ;
 import com.hp.hpl.jena.rdf.model.Resource ;
 import com.hp.hpl.jena.sparql.util.graph.GraphUtils ;
 
@@ -49,16 +51,30 @@ public class TextIndexLuceneAssembler ex
         .
     */
 
+    @SuppressWarnings("resource")
     @Override
     public TextIndex open(Assembler a, Resource root, Mode mode)
     {
         try
         {
-            Resource x = GraphUtils.getResourceValue(root, pDirectory) ;
-            String path = IRILib.IRIToFilename(x.getURI()) ; 
-            File dir = new File(path) ; 
-            Directory directory = FSDirectory.open(dir) ;
-            Log.info(TextIndexLuceneAssembler.class, "Lucene text index : "+dir) ;
+            if ( ! GraphUtils.exactlyOneProperty(root, pDirectory) )
+                throw new TextIndexException("No 'text:directory' property on "+root) ;
+            
+            Directory directory ;
+            RDFNode n = root.getProperty(pDirectory).getObject() ;
+            if ( n.isLiteral() )
+            {
+                if ( ! "mem".equals(n.asLiteral().getLexicalForm()) )
+                    throw new TextIndexException("No 'text:directory' property on "+root+ " is a liteal and not \"mem\"") ;
+                 directory = new RAMDirectory() ;
+            }
+            else
+            { 
+                Resource x = n.asResource() ;
+                String path = IRILib.IRIToFilename(x.getURI()) ; 
+                File dir = new File(path) ; 
+                directory = FSDirectory.open(dir) ;
+            }
         
             Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
             EntityDefinition docDef = (EntityDefinition)a.open(r) ; 

Added: jena/Experimental/jena-text/text-config.ttl
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/text-config.ttl?rev=1466544&view=auto
==============================================================================
--- jena/Experimental/jena-text/text-config.ttl (added)
+++ jena/Experimental/jena-text/text-config.ttl Wed Apr 10 16:16:03 2013
@@ -0,0 +1,47 @@
+ ## Example of a TDB dataset and text index
+
+@prefix :        <http://localhost/jena_example/#> .
+@prefix rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs:    <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix tdb:     <http://jena.hpl.hp.com/2008/tdb#> .
+@prefix ja:      <http://jena.hpl.hp.com/2005/11/Assembler#> .
+@prefix text:    <http://jena.apache.org/text#> .
+
+# TDB
+[] ja:loadClass "com.hp.hpl.jena.tdb.TDB" .
+tdb:DatasetTDB  rdfs:subClassOf  ja:RDFDataset .
+tdb:GraphTDB    rdfs:subClassOf  ja:Model .
+
+# Text
+[] ja:loadClass "org.apache.jena.query.text.TextQuery" .
+text:TextDataset      rdfs:subClassOf   ja:RDFDataset .
+#text:TextIndexSolr    rdfs:subClassOf   text:TextIndex .
+text:TextIndexLucene  rdfs:subClassOf   text:TextIndex .
+
+## ---------------------------------------------------------------
+## This URI must be fixed - it's used to assemble the text dataset.
+
+:text_dataset rdf:type     text:TextDataset ;
+    text:dataset   <#dataset> ;
+    ##text:index   <#indexSolr> ;
+    text:index     <#indexLucene> ;
+    .
+
+<#dataset> rdf:type      tdb:DatasetTDB ;
+    tdb:location "--mem--" ;
+    tdb:unionDefaultGraph true ;
+    .
+
+<#indexLucene> a text:TextIndexLucene ;
+    #text:directory <file:Lucene> ;
+    text: "mem" ;
+    text:entityMap <#entMap> ;
+    .
+
+<#entMap> a text:EntityMap ;
+    text:entityField      "uri" ;
+    text:defaultField     "text" ; ## Must be defined in the text:maps
+    text:map (
+         # rdfs:label            
+         [ text:field "text" ; text:predicate rdfs:label ]
+         ) .