You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2013/04/10 18:16:03 UTC
svn commit: r1466544 - in /jena/Experimental/jena-text: ./
src/main/java/examples/ src/main/java/org/apache/jena/query/text/
src/main/java/org/apache/jena/query/text/assembler/
Author: andy
Date: Wed Apr 10 16:16:03 2013
New Revision: 1466544
URL: http://svn.apache.org/r1466544
Log:
Simplify programmatic construction of text datasets.
Allow a Lucense directory name of "mem" for in-memory index.
Added:
jena/Experimental/jena-text/data.ttl
jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java
jena/Experimental/jena-text/text-config.ttl
Removed:
jena/Experimental/jena-text/text-query.mdtext
Modified:
jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
Added: jena/Experimental/jena-text/data.ttl
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/data.ttl?rev=1466544&view=auto
==============================================================================
--- jena/Experimental/jena-text/data.ttl (added)
+++ jena/Experimental/jena-text/data.ttl Wed Apr 10 16:16:03 2013
@@ -0,0 +1,10 @@
+@prefix : <http://example/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+:x1 rdfs:label "X1 word" .
+:x1 rdfs:label "X1 word" .
+:x2 rdfs:label "X2 word" .
+:x3 rdfs:label "X3 word" .
+:x1 rdfs:label "X9 word" .
\ No newline at end of file
Added: jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java?rev=1466544&view=auto
==============================================================================
--- jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java (added)
+++ jena/Experimental/jena-text/src/main/java/examples/JenaTextExample1.java Wed Apr 10 16:16:03 2013
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package examples;
+
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.atlas.logging.Log ;
+import org.apache.jena.query.text.EntityDefinition ;
+import org.apache.jena.query.text.TextDatasetFactory ;
+import org.apache.jena.query.text.TextQuery ;
+import org.apache.jena.riot.RDFDataMgr ;
+import org.apache.lucene.store.Directory ;
+import org.apache.lucene.store.RAMDirectory ;
+import org.slf4j.Logger ;
+import org.slf4j.LoggerFactory ;
+
+import com.hp.hpl.jena.query.* ;
+import com.hp.hpl.jena.rdf.model.Model ;
+import com.hp.hpl.jena.sparql.util.QueryExecUtils ;
+import com.hp.hpl.jena.vocabulary.RDFS ;
+
+/** Build a text search dataset */
+public class JenaTextExample1
+{
+ static { Log.setLog4j() ; }
+ static Logger log = LoggerFactory.getLogger("JenaTextExample") ;
+
+ public static void main(String ... argv)
+ {
+ TextQuery.init();
+ Dataset ds = createCode() ;
+ //Dataset ds = createAssembler() ;
+ loadData(ds , "data.ttl") ;
+ queryData(ds) ;
+ }
+
+ public static Dataset createCode()
+ {
+ log.info("Construct an in-memory dataset with in-memory lucene index using code") ;
+ // Build a text dataset by code.
+ // Here , in-memory base data and in-memeory Lucene index
+
+ // Base data
+ Dataset ds1 = DatasetFactory.createMem() ;
+
+ // Define the index mapping
+ EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+
+ // Lucene, in memory.
+ Directory dir = new RAMDirectory();
+
+ // Join together into a dataset
+ Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef) ;
+
+ return ds ;
+ }
+
+ public static Dataset createAssembler()
+ {
+ log.info("Construct text dataset using an assembler description") ;
+ // There are two datasets in the configuration:
+ // the one for the base data and one with text index.
+ // Therafore we need to name the dataset we are interested in.
+ Dataset ds = DatasetFactory.assemble("text-config.ttl", "http://localhost/jena_example/#text_dataset") ;
+ return ds ;
+ }
+
+ public static void loadData(Dataset dataset, String file)
+ {
+ log.info("Start loading") ;
+ long startTime = System.nanoTime() ;
+ dataset.begin(ReadWrite.WRITE) ;
+ try {
+ Model m = dataset.getDefaultModel() ;
+ RDFDataMgr.read(m, "D.ttl") ;
+ //RDFDataMgr.read(dataset, "D.ttl") ;
+ dataset.commit() ;
+ } finally { dataset.end() ; }
+
+ long finishTime = System.nanoTime() ;
+ double time = (finishTime-startTime)/1.0e6 ;
+ log.info(String.format("Finish loading - %.2fms", time)) ;
+
+ //RDFDataMgr.read(ds, file) ;
+ }
+
+ public static void queryData(Dataset dataset)
+ {
+ log.info("START") ;
+ long startTime = System.nanoTime() ;
+ String pre = StrUtils.strjoinNL
+ ( "PREFIX : <http://example/>"
+ , "PREFIX text: <http://jena.apache.org/text#>"
+ , "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>") ;
+
+ String qs = StrUtils.strjoinNL
+ ( "SELECT * "
+ , " { ?s text:query (rdfs:label 'X1') ;"
+ , " rdfs:label ?label"
+ , " }") ;
+ dataset.begin(ReadWrite.READ) ;
+ try {
+ Query q = QueryFactory.create(pre+"\n"+qs) ;
+ QueryExecution qexec = QueryExecutionFactory.create(q , dataset) ;
+ QueryExecUtils.executeQuery(q, qexec) ;
+ } finally { dataset.end() ; }
+ long finishTime = System.nanoTime() ;
+ double time = (finishTime-startTime)/1.0e6 ;
+ log.info(String.format("FINISH - %.2fms", time)) ;
+
+ }
+
+}
+
Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java Wed Apr 10 16:16:03 2013
@@ -29,6 +29,8 @@ import com.hp.hpl.jena.sparql.core.assem
public class TextDatasetFactory
{
+ static { TextQuery.init(); }
+
/** Use an assembler file to build a dataset with text search capabilities */
public static Dataset create(String assemblerFile)
{
@@ -36,10 +38,10 @@ public class TextDatasetFactory
}
/** Create a text-indexed dataset */
- public static Dataset create(Dataset base, TextIndex index)
+ public static Dataset create(Dataset base, TextIndex textIndex)
{
DatasetGraph dsg = base.asDatasetGraph() ;
- dsg = create(dsg, index) ;
+ dsg = create(dsg, textIndex) ;
return DatasetFactory.create(dsg) ;
}
@@ -48,7 +50,10 @@ public class TextDatasetFactory
public static DatasetGraph create(DatasetGraph dsg, TextIndex textIndex)
{
TextDocProducer producer = new TextDocProducerTriples(textIndex.getDocDef(), textIndex) ;
- return new DatasetGraphText(dsg, textIndex, producer) ;
+ DatasetGraph dsgt = new DatasetGraphText(dsg, textIndex, producer) ;
+ dsgt.getContext().set(TextQuery.textIndex, textIndex) ;
+ return dsgt ;
+
}
/** Create a Lucene TextIndex */
Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java Wed Apr 10 16:16:03 2013
@@ -112,7 +112,7 @@ public class TextIndexLucene implements
try {
Document doc = doc(entity) ;
indexWriter.addDocument(doc) ;
- } catch (Exception e) { exception(e) ; }
+ } catch (IOException e) { exception(e) ; }
}
private Document doc(Entity entity)
Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java Wed Apr 10 16:16:03 2013
@@ -23,7 +23,6 @@ import static org.apache.jena.query.text
import static org.apache.jena.query.text.assembler.TextVocab.textDataset ;
import org.apache.jena.query.text.TextDatasetFactory ;
import org.apache.jena.query.text.TextIndex ;
-import org.apache.jena.query.text.TextQuery ;
import com.hp.hpl.jena.assembler.Assembler ;
import com.hp.hpl.jena.assembler.Mode ;
@@ -50,7 +49,6 @@ public class TextDatasetAssembler extend
@Override
public Dataset open(Assembler a, Resource root, Mode mode)
{
- //Log.info(TextDatasetAssembler.class, "Text dataset index") ;
Resource dataset = GraphUtils.getResourceValue(root, pDataset) ;
Resource index = GraphUtils.getResourceValue(root, pIndex) ;
@@ -58,7 +56,6 @@ public class TextDatasetAssembler extend
TextIndex textIndex = (TextIndex)a.open(index) ;
Dataset dst = TextDatasetFactory.create(ds, textIndex) ;
- dst.getContext().set(TextQuery.textIndex, textIndex) ;
return dst ;
}
Modified: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java?rev=1466544&r1=1466543&r2=1466544&view=diff
==============================================================================
--- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java (original)
+++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java Wed Apr 10 16:16:03 2013
@@ -26,16 +26,18 @@ import java.io.IOException ;
import org.apache.jena.atlas.io.IO ;
import org.apache.jena.atlas.lib.IRILib ;
-import org.apache.jena.atlas.logging.Log ;
import org.apache.jena.query.text.EntityDefinition ;
import org.apache.jena.query.text.TextDatasetFactory ;
import org.apache.jena.query.text.TextIndex ;
+import org.apache.jena.query.text.TextIndexException ;
import org.apache.lucene.store.Directory ;
import org.apache.lucene.store.FSDirectory ;
+import org.apache.lucene.store.RAMDirectory ;
import com.hp.hpl.jena.assembler.Assembler ;
import com.hp.hpl.jena.assembler.Mode ;
import com.hp.hpl.jena.assembler.assemblers.AssemblerBase ;
+import com.hp.hpl.jena.rdf.model.RDFNode ;
import com.hp.hpl.jena.rdf.model.Resource ;
import com.hp.hpl.jena.sparql.util.graph.GraphUtils ;
@@ -49,16 +51,30 @@ public class TextIndexLuceneAssembler ex
.
*/
+ @SuppressWarnings("resource")
@Override
public TextIndex open(Assembler a, Resource root, Mode mode)
{
try
{
- Resource x = GraphUtils.getResourceValue(root, pDirectory) ;
- String path = IRILib.IRIToFilename(x.getURI()) ;
- File dir = new File(path) ;
- Directory directory = FSDirectory.open(dir) ;
- Log.info(TextIndexLuceneAssembler.class, "Lucene text index : "+dir) ;
+ if ( ! GraphUtils.exactlyOneProperty(root, pDirectory) )
+ throw new TextIndexException("No 'text:directory' property on "+root) ;
+
+ Directory directory ;
+ RDFNode n = root.getProperty(pDirectory).getObject() ;
+ if ( n.isLiteral() )
+ {
+ if ( ! "mem".equals(n.asLiteral().getLexicalForm()) )
+ throw new TextIndexException("No 'text:directory' property on "+root+ " is a liteal and not \"mem\"") ;
+ directory = new RAMDirectory() ;
+ }
+ else
+ {
+ Resource x = n.asResource() ;
+ String path = IRILib.IRIToFilename(x.getURI()) ;
+ File dir = new File(path) ;
+ directory = FSDirectory.open(dir) ;
+ }
Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
EntityDefinition docDef = (EntityDefinition)a.open(r) ;
Added: jena/Experimental/jena-text/text-config.ttl
URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/text-config.ttl?rev=1466544&view=auto
==============================================================================
--- jena/Experimental/jena-text/text-config.ttl (added)
+++ jena/Experimental/jena-text/text-config.ttl Wed Apr 10 16:16:03 2013
@@ -0,0 +1,47 @@
+ ## Example of a TDB dataset and text index
+
+@prefix : <http://localhost/jena_example/#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix tdb: <http://jena.hpl.hp.com/2008/tdb#> .
+@prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> .
+@prefix text: <http://jena.apache.org/text#> .
+
+# TDB
+[] ja:loadClass "com.hp.hpl.jena.tdb.TDB" .
+tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
+tdb:GraphTDB rdfs:subClassOf ja:Model .
+
+# Text
+[] ja:loadClass "org.apache.jena.query.text.TextQuery" .
+text:TextDataset rdfs:subClassOf ja:RDFDataset .
+#text:TextIndexSolr rdfs:subClassOf text:TextIndex .
+text:TextIndexLucene rdfs:subClassOf text:TextIndex .
+
+## ---------------------------------------------------------------
+## This URI must be fixed - it's used to assemble the text dataset.
+
+:text_dataset rdf:type text:TextDataset ;
+ text:dataset <#dataset> ;
+ ##text:index <#indexSolr> ;
+ text:index <#indexLucene> ;
+ .
+
+<#dataset> rdf:type tdb:DatasetTDB ;
+ tdb:location "--mem--" ;
+ tdb:unionDefaultGraph true ;
+ .
+
+<#indexLucene> a text:TextIndexLucene ;
+ #text:directory <file:Lucene> ;
+ text: "mem" ;
+ text:entityMap <#entMap> ;
+ .
+
+<#entMap> a text:EntityMap ;
+ text:entityField "uri" ;
+ text:defaultField "text" ; ## Must be defined in the text:maps
+ text:map (
+ # rdfs:label
+ [ text:field "text" ; text:predicate rdfs:label ]
+ ) .