You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by sa...@apache.org on 2015/02/25 10:33:37 UTC
jena git commit: Add option to specify an analyzer for the query
string, and add mechanism for defining custom TextDocProducers
Repository: jena
Updated Branches:
refs/heads/master 59cb7f3b6 -> f52f75014
Add option to specify an analyzer for the query string, and add mechanism for defining custom TextDocProducers
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/f52f7501
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/f52f7501
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/f52f7501
Branch: refs/heads/master
Commit: f52f7501482f99312332e931aa7313957019b75a
Parents: 59cb7f3
Author: Stephen Allen <sa...@apache.org>
Authored: Wed Feb 25 04:32:55 2015 -0500
Committer: Stephen Allen <sa...@apache.org>
Committed: Wed Feb 25 04:32:55 2015 -0500
----------------------------------------------------------------------
jena-text/ReleaseNotes.txt | 2 +
.../main/java/examples/JenaTextExample1.java | 2 +-
jena-text/src/main/java/jena/textindexdump.java | 2 +-
.../jena/query/text/TextDatasetFactory.java | 54 ++++++++++++---
.../apache/jena/query/text/TextDocProducer.java | 6 ++
.../query/text/TextDocProducerEntities.java | 4 +-
.../jena/query/text/TextDocProducerTriples.java | 4 +-
.../apache/jena/query/text/TextIndexLucene.java | 13 +++-
.../text/assembler/TextDatasetAssembler.java | 31 +++++++--
.../assembler/TextIndexLuceneAssembler.java | 16 ++++-
.../jena/query/text/assembler/TextVocab.java | 2 +
...ractTestDatasetWithLuceneGraphTextIndex.java | 2 +-
.../jena/query/text/TestBuildTextDataset.java | 2 +-
.../text/TestLuceneWithMultipleThreads.java | 6 +-
.../org/apache/jena/query/text/TestTextTDB.java | 2 +-
.../assembler/AbstractTestTextAssembler.java | 23 ++++++-
.../assembler/TestTextDatasetAssembler.java | 72 ++++++++++++++------
.../assembler/TestTextIndexLuceneAssembler.java | 12 ++++
18 files changed, 202 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/ReleaseNotes.txt
----------------------------------------------------------------------
diff --git a/jena-text/ReleaseNotes.txt b/jena-text/ReleaseNotes.txt
index 5645f55..bd27f64 100644
--- a/jena-text/ReleaseNotes.txt
+++ b/jena-text/ReleaseNotes.txt
@@ -4,5 +4,7 @@ ChangeLog for jena-text
==== jena-text 1.1.2
+ JENA-848 : Fix concurrency control for in-process Lucene index
++ Add option to specify an analyzer for the query string
++ Add mechanism for defining custom TextDocProducers
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/examples/JenaTextExample1.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/examples/JenaTextExample1.java b/jena-text/src/main/java/examples/JenaTextExample1.java
index e11d8be..631a096 100644
--- a/jena-text/src/main/java/examples/JenaTextExample1.java
+++ b/jena-text/src/main/java/examples/JenaTextExample1.java
@@ -65,7 +65,7 @@ public class JenaTextExample1
Directory dir = new RAMDirectory();
// Join together into a dataset
- Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef) ;
+ Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
return ds ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/jena/textindexdump.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/jena/textindexdump.java b/jena-text/src/main/java/jena/textindexdump.java
index 773edca..6aceb16 100644
--- a/jena-text/src/main/java/jena/textindexdump.java
+++ b/jena-text/src/main/java/jena/textindexdump.java
@@ -100,7 +100,7 @@ public class textindexdump extends CmdARQ {
private static void dump(TextIndexLucene textIndex) {
try {
Directory directory = textIndex.getDirectory() ;
- Analyzer analyzer = textIndex.getAnalyzer() ;
+ Analyzer analyzer = textIndex.getQueryAnalyzer() ;
IndexReader indexReader = DirectoryReader.open(directory) ;
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
QueryParser queryParser = new QueryParser(TextIndexLucene.VER, textIndex.getDocDef().getPrimaryField(), analyzer);
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index f6ab21b..c7e4208 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -19,6 +19,7 @@
package org.apache.jena.query.text;
import org.apache.jena.query.text.assembler.TextVocab ;
+import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.store.Directory ;
import org.apache.solr.client.solrj.SolrServer ;
@@ -51,6 +52,14 @@ public class TextDatasetFactory
dsg = create(dsg, textIndex, closeIndexOnDSGClose) ;
return DatasetFactory.create(dsg) ;
}
+
+ /** Create a text-indexed dataset, optionally allowing the text index to be closed if the Dataset is */
+ public static Dataset create(Dataset base, TextIndex textIndex, boolean closeIndexOnDSGClose, TextDocProducer producer)
+ {
+ DatasetGraph dsg = base.asDatasetGraph() ;
+ dsg = create(dsg, textIndex, closeIndexOnDSGClose, producer) ;
+ return DatasetFactory.create(dsg) ;
+ }
/** Create a text-indexed DatasetGraph */
@@ -62,7 +71,12 @@ public class TextDatasetFactory
/** Create a text-indexed DatasetGraph, optionally allowing the text index to be closed if the DatasetGraph is */
public static DatasetGraph create(DatasetGraph dsg, TextIndex textIndex, boolean closeIndexOnDSGClose)
{
- TextDocProducer producer = new TextDocProducerTriples(textIndex.getDocDef(), textIndex) ;
+ return create(dsg, textIndex, closeIndexOnDSGClose, null);
+ }
+
+ /** Create a text-indexed DatasetGraph, optionally allowing the text index to be closed if the DatasetGraph is */
+ public static DatasetGraph create(DatasetGraph dsg, TextIndex textIndex, boolean closeIndexOnDSGClose, TextDocProducer producer) {
+ if (producer == null) producer = new TextDocProducerTriples(textIndex) ;
DatasetGraph dsgt = new DatasetGraphText(dsg, textIndex, producer, closeIndexOnDSGClose) ;
// Also set on dsg
Context c = dsgt.getContext() ;
@@ -71,24 +85,44 @@ public class TextDatasetFactory
return dsgt ;
}
- /** Create a Lucene TextIndex */
- public static TextIndex createLuceneIndex(Directory directory, EntityDefinition entMap)
+ /**
+ * Create a Lucene TextIndex
+ *
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ */
+ public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
{
- TextIndex index = new TextIndexLucene(directory, entMap) ;
+ TextIndex index = new TextIndexLucene(directory, def, queryAnalyzer) ;
return index ;
}
- /** Create a text-indexed dataset, using Lucene */
- public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition entMap)
+ /**
+ * Create a text-indexed dataset, using Lucene
+ *
+ * @param base the base Dataset
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ */
+ public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
{
- TextIndex index = createLuceneIndex(directory, entMap) ;
+ TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
return create(base, index, true) ;
}
- /** Create a text-indexed dataset, using Lucene */
- public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition entMap)
+ /**
+ * Create a text-indexed dataset, using Lucene
+ *
+ * @param base the base DatasetGraph
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ */
+ public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
{
- TextIndex index = createLuceneIndex(directory, entMap) ;
+ TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
return create(base, index, true) ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
index 8e74af7..afb522f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
@@ -20,6 +20,12 @@ package org.apache.jena.query.text;
import com.hp.hpl.jena.sparql.core.DatasetChanges ;
+/**
+ * Responsible for monitoring changes on a dataset and updating the TextIndex appropriately.
+ * <p>
+ * <strong>Note:</strong> If implementation is to be instantiated dynamically via an Assembler, it must
+ * contain a constructor that accepts a single parameter of type {@link org.apache.jena.query.text.TextIndex}
+ */
public interface TextDocProducer extends DatasetChanges
{
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
index 9e2de8d..4427de6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
@@ -40,8 +40,8 @@ public class TextDocProducerEntities extends DatasetChangesBatched implements Te
// therefore whether or not we have to do autocommit
private final ThreadLocal<Boolean> inTransaction = new ThreadLocal<Boolean>() ;
- public TextDocProducerEntities(EntityDefinition defn, TextIndex indexer) {
- this.defn = defn ;
+ public TextDocProducerEntities(TextIndex indexer) {
+ this.defn = indexer.getDocDef() ;
this.indexer = indexer ;
inTransaction.set(false) ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
index 3700eb0..30da166 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
@@ -34,8 +34,8 @@ public class TextDocProducerTriples implements TextDocProducer {
private final ThreadLocal<Boolean> inTransaction = new ThreadLocal<Boolean>() ;
- public TextDocProducerTriples(EntityDefinition defn, TextIndex indexer) {
- this.defn = defn ;
+ public TextDocProducerTriples(TextIndex indexer) {
+ this.defn = indexer.getDocDef() ;
this.indexer = indexer ;
inTransaction.set(false) ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 36d4050..12ff1ea 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -75,6 +75,7 @@ public class TextIndexLucene implements TextIndex {
private final EntityDefinition docDef ;
private final Directory directory ;
private final Analyzer analyzer ;
+ private final Analyzer queryAnalyzer ;
// The IndexWriter can't be final because we may have to recreate it if rollback() is called.
// However, it needs to be volatile in case the next write transaction is on a different thread,
@@ -87,8 +88,9 @@ public class TextIndexLucene implements TextIndex {
*
* @param directory The Lucene Directory for the index
* @param def The EntityDefinition that defines how entities are stored in the index
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
*/
- public TextIndexLucene(Directory directory, EntityDefinition def) {
+ public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer queryAnalyzer) {
this.directory = directory ;
this.docDef = def ;
@@ -107,6 +109,7 @@ public class TextIndexLucene implements TextIndex {
}
this.analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(VER), analyzerPerField) ;
+ this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : analyzer ;
openIndexWriter();
}
@@ -133,6 +136,10 @@ public class TextIndexLucene implements TextIndex {
return analyzer ;
}
+ public Analyzer getQueryAnalyzer() {
+ return queryAnalyzer ;
+ }
+
public IndexWriter getIndexWriter() {
return indexWriter;
}
@@ -239,7 +246,7 @@ public class TextIndexLucene implements TextIndex {
private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
String escaped = QueryParserBase.escape(uri) ;
String qs = docDef.getEntityField() + ":" + escaped ;
- Query query = parseQuery(qs, docDef.getPrimaryField(), analyzer) ;
+ Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs ;
List<Map<String, Node>> records = new ArrayList<Map<String, Node>>() ;
@@ -286,7 +293,7 @@ public class TextIndexLucene implements TextIndex {
private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
- Query query = parseQuery(qs, docDef.getPrimaryField(), analyzer) ;
+ Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
if ( limit <= 0 )
limit = MAX_N ;
ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
index c343824..7fb30cb 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
@@ -18,10 +18,12 @@
package org.apache.jena.query.text.assembler;
-import static org.apache.jena.query.text.assembler.TextVocab.pDataset ;
-import static org.apache.jena.query.text.assembler.TextVocab.pIndex ;
-import static org.apache.jena.query.text.assembler.TextVocab.textDataset ;
+import java.lang.reflect.Constructor ;
+
+import org.apache.jena.atlas.logging.Log ;
import org.apache.jena.query.text.TextDatasetFactory ;
+import org.apache.jena.query.text.TextDocProducer ;
+import org.apache.jena.query.text.TextDocProducerTriples ;
import org.apache.jena.query.text.TextIndex ;
import com.hp.hpl.jena.assembler.Assembler ;
@@ -29,9 +31,13 @@ import com.hp.hpl.jena.assembler.Mode ;
import com.hp.hpl.jena.assembler.assemblers.AssemblerBase ;
import com.hp.hpl.jena.query.Dataset ;
import com.hp.hpl.jena.rdf.model.Resource ;
+import com.hp.hpl.jena.sparql.ARQConstants ;
import com.hp.hpl.jena.sparql.core.assembler.DatasetAssembler ;
+import com.hp.hpl.jena.sparql.util.Loader ;
import com.hp.hpl.jena.sparql.util.graph.GraphUtils ;
+import static org.apache.jena.query.text.assembler.TextVocab.* ;
+
public class TextDatasetAssembler extends AssemblerBase implements Assembler
{
private DatasetAssembler datasetAssembler = new DatasetAssembler() ;
@@ -51,11 +57,28 @@ public class TextDatasetAssembler extends AssemblerBase implements Assembler
{
Resource dataset = GraphUtils.getResourceValue(root, pDataset) ;
Resource index = GraphUtils.getResourceValue(root, pIndex) ;
+ Resource textDocProducerNode = GraphUtils.getResourceValue(root, pTextDocProducer) ;
Dataset ds = (Dataset)a.open(dataset) ;
TextIndex textIndex = (TextIndex)a.open(index) ;
+ // Null will use the default producer
+ TextDocProducer textDocProducer = null ;
+ if (null != textDocProducerNode) {
+ Class<?> c = Loader.loadClass(textDocProducerNode.getURI(), TextDocProducer.class) ;
+ try
+ {
+ Constructor<?> ctor = c.getConstructor(TextIndex.class) ;
+ textDocProducer = (TextDocProducer)ctor.newInstance(textIndex) ;
+ }
+ catch (Exception ex)
+ {
+ String className = textDocProducerNode.getURI().substring(ARQConstants.javaClassURIScheme.length()) ;
+ Log.warn(Loader.class, "Exception during instantiation '"+className+"': "+ex.getMessage()) ;
+ return null ;
+ }
+ }
- Dataset dst = TextDatasetFactory.create(ds, textIndex, true) ;
+ Dataset dst = TextDatasetFactory.create(ds, textIndex, true, textDocProducer) ;
return dst ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index bbfb40a..670c530 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -20,6 +20,7 @@ package org.apache.jena.query.text.assembler ;
import static org.apache.jena.query.text.assembler.TextVocab.pDirectory ;
import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap ;
+import static org.apache.jena.query.text.assembler.TextVocab.pQueryAnalyzer ;
import java.io.File ;
import java.io.IOException ;
@@ -30,6 +31,7 @@ import org.apache.jena.query.text.TextDatasetFactory ;
import org.apache.jena.query.text.TextIndex ;
import org.apache.jena.query.text.TextIndexException ;
import org.apache.jena.riot.system.IRILib ;
+import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.store.Directory ;
import org.apache.lucene.store.FSDirectory ;
import org.apache.lucene.store.RAMDirectory ;
@@ -39,6 +41,7 @@ import com.hp.hpl.jena.assembler.Mode ;
import com.hp.hpl.jena.assembler.assemblers.AssemblerBase ;
import com.hp.hpl.jena.rdf.model.RDFNode ;
import com.hp.hpl.jena.rdf.model.Resource ;
+import com.hp.hpl.jena.rdf.model.Statement ;
import com.hp.hpl.jena.sparql.util.graph.GraphUtils ;
public class TextIndexLuceneAssembler extends AssemblerBase {
@@ -75,11 +78,22 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
File dir = new File(path) ;
directory = FSDirectory.open(dir) ;
}
+
+ Analyzer queryAnalyzer = null;
+ Statement queryAnalyzerStatement = root.getProperty(pQueryAnalyzer);
+ if (null != queryAnalyzerStatement) {
+ RDFNode qaNode = queryAnalyzerStatement.getObject();
+ if (! qaNode.isResource()) {
+ throw new TextIndexException("Text query analyzer property is not a resource : " + qaNode);
+ }
+ Resource analyzerResource = (Resource) qaNode;
+ queryAnalyzer = (Analyzer) a.open(analyzerResource);
+ }
Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
EntityDefinition docDef = (EntityDefinition)a.open(r) ;
- return TextDatasetFactory.createLuceneIndex(directory, docDef) ;
+ return TextDatasetFactory.createLuceneIndex(directory, docDef, queryAnalyzer) ;
} catch (IOException e) {
IO.exception(e) ;
return null ;
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 62acaab..78da358 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -31,12 +31,14 @@ public class TextVocab
public static final Resource textDataset = Vocab.resource(NS, "TextDataset") ;
public static final Property pDataset = Vocab.property(NS, "dataset") ;
public static final Property pIndex = Vocab.property(NS, "index") ;
+ public static final Property pTextDocProducer = Vocab.property(NS, "textDocProducer") ;
public static final Resource textIndex = Vocab.resource(NS, "TextIndex") ;
public static final Resource textIndexSolr = Vocab.resource(NS, "TextIndexSolr") ;
public static final Resource textIndexLucene = Vocab.resource(NS, "TextIndexLucene") ;
public static final Property pServer = Vocab.property(NS, "server") ; // Solr
public static final Property pDirectory = Vocab.property(NS, "directory") ; // Lucene
+ public static final Property pQueryAnalyzer = Vocab.property(NS, "queryAnalyzer") ;
public static final Property pEntityMap = Vocab.property(NS, "entityMap") ;
// Entity definition
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
index 7d78fba..7f83b90 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
@@ -39,7 +39,7 @@ public class AbstractTestDatasetWithLuceneGraphTextIndex extends AbstractTestDat
Directory dir = new RAMDirectory() ;
EntityDefinition eDef = new EntityDefinition("iri", "text", "graph", RDFS.label.asNode()) ;
eDef.set("comment", RDFS.comment.asNode()) ; // some tests require indexing rdfs:comment
- TextIndex tidx = new TextIndexLucene(dir, eDef) ;
+ TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
dataset = TextDatasetFactory.create(ds1, tidx) ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
index daba00c..e045477 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
@@ -117,7 +117,7 @@ public class TestBuildTextDataset extends BaseTest
Directory dir = new RAMDirectory() ;
// Join together into a dataset
- Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef) ;
+ Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
return ds ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
index b60110d..b14526e 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
@@ -63,7 +63,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testReadInMiddleOfWrite() throws InterruptedException, ExecutionException
{
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
final Dataset ds = DatasetFactory.create(dsg);
final ExecutorService execService = Executors.newSingleThreadExecutor();
final Future<?> f = execService.submit(new Runnable()
@@ -119,7 +119,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testWriteInMiddleOfRead() throws InterruptedException, ExecutionException
{
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
final int numReads = 10;
final Dataset ds = DatasetFactory.create(dsg);
final ExecutorService execService = Executors.newFixedThreadPool(10);
@@ -187,7 +187,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testIsolation() throws InterruptedException, ExecutionException {
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), entDef);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), entDef, null);
final int numReaders = 2;
final List<Future<?>> futures = new ArrayList<Future<?>>(numReaders);
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
index 7f1d13e..a80d399 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
@@ -40,7 +40,7 @@ public class TestTextTDB extends BaseTest
Dataset ds1 = TDBFactory.createDataset() ;
Directory dir = new RAMDirectory() ;
EntityDefinition eDef = new EntityDefinition("iri", "text", RDFS.label) ;
- TextIndex tidx = new TextIndexLucene(dir, eDef) ;
+ TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
Dataset ds = TextDatasetFactory.create(ds1, tidx) ;
return ds ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
index 49d029e..2fde909 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
@@ -41,6 +41,8 @@ public abstract class AbstractTestTextAssembler {
protected static final Resource SIMPLE_INDEX_SPEC2;
protected static final Resource SIMPLE_INDEX_SPEC3;
protected static final Resource SIMPLE_INDEX_SPEC4;
+ protected static final Resource SIMPLE_INDEX_SPEC5;
+ protected static final Resource SIMPLE_INDEX_SPEC_QUERY_ANALYZER;
protected static final Resource SIMPLE_ENTITY_MAP_SPEC;
protected static final Resource SIMPLE_INDEX_SPEC_LITERAL_DIR;
protected static final Resource SIMPLE_INDEX_SPEC_MEM_DIR;
@@ -76,17 +78,32 @@ public abstract class AbstractTestTextAssembler {
.addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec2"))
.addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
- SIMPLE_INDEX_SPEC4 =
+ SIMPLE_INDEX_SPEC3 =
model.createResource(TESTBASE + "simpleIndexSpec3")
.addProperty(RDF.type, TextVocab.textIndexLucene)
.addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec3"))
.addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
- SIMPLE_INDEX_SPEC3 =
+ SIMPLE_INDEX_SPEC4 =
model.createResource(TESTBASE + "simpleIndexSpec4")
.addProperty(RDF.type, TextVocab.textIndexLucene)
.addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec4"))
.addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
+
+ SIMPLE_INDEX_SPEC5 =
+ model.createResource(TESTBASE + "simpleIndexSpec5")
+ .addProperty(RDF.type, TextVocab.textIndexLucene)
+ .addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec5"))
+ .addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
+
+ SIMPLE_INDEX_SPEC_QUERY_ANALYZER =
+ model.createResource(TESTBASE + "simpleIndexSpecQueryAnalyzer")
+ .addProperty(RDF.type, TextVocab.textIndexLucene)
+ .addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpecQueryAnalyzer"))
+ .addProperty(TextVocab.pQueryAnalyzer,
+ model.createResource().addProperty(RDF.type, TextVocab.keywordAnalyzer)
+ )
+ .addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
SIMPLE_INDEX_SPEC_LITERAL_DIR =
model.createResource(TESTBASE + "simpleIndexLiteralDirSpec")
@@ -108,6 +125,8 @@ public abstract class AbstractTestTextAssembler {
indexDir = new File("target/test/testasm/simpleIndexSpec2"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
indexDir = new File("target/test/testasm/simpleIndexSpec3"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
indexDir = new File("target/test/testasm/simpleIndexSpec4"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+ indexDir = new File("target/test/testasm/simpleIndexSpec5"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+ indexDir = new File("target/test/testasm/simpleIndexSpecQueryAnalyzer"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
indexDir = new File("target/test/testasm/simpleIndexLiteralDir"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
index cdc9f54..c0bdb82 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
@@ -18,18 +18,23 @@
package org.apache.jena.query.text.assembler;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import org.apache.jena.query.text.DatasetGraphText ;
+import org.apache.jena.query.text.TextDocProducer ;
+import org.apache.jena.query.text.TextIndex ;
+import org.apache.jena.query.text.TextIndexLucene ;
+import org.apache.jena.query.text.TextQuery ;
+import org.junit.Test ;
-import org.apache.jena.query.text.TextIndexLucene;
-import org.apache.jena.query.text.TextQuery;
-import org.junit.Test;
+import com.hp.hpl.jena.assembler.Assembler ;
+import com.hp.hpl.jena.assembler.exceptions.AssemblerException ;
+import com.hp.hpl.jena.graph.Node ;
+import com.hp.hpl.jena.query.Dataset ;
+import com.hp.hpl.jena.rdf.model.Resource ;
+import com.hp.hpl.jena.sparql.core.QuadAction ;
+import com.hp.hpl.jena.tdb.assembler.AssemblerTDB ;
+import com.hp.hpl.jena.vocabulary.RDF ;
-import com.hp.hpl.jena.assembler.Assembler;
-import com.hp.hpl.jena.query.Dataset;
-import com.hp.hpl.jena.rdf.model.Resource;
-import com.hp.hpl.jena.tdb.assembler.AssemblerTDB;
-import com.hp.hpl.jena.vocabulary.RDF;
+import static org.junit.Assert.* ;
/**
* Test the text dataset assembler.
@@ -41,24 +46,29 @@ public class TestTextDatasetAssembler extends AbstractTestTextAssembler {
private static final Resource spec1;
private static final Resource noDatasetPropertySpec;
private static final Resource noIndexPropertySpec;
+ private static final Resource customTextDocProducerSpec;
- @Test public void testSimpleDatasetAssembler() {
+ @Test
+ public void testSimpleDatasetAssembler() {
Dataset dataset = (Dataset) Assembler.general.open(spec1);
assertTrue(dataset.getContext().get(TextQuery.textIndex) instanceof TextIndexLucene);
}
- @Test public void testErrorOnNoDataset() {
- try {
- Assembler.general.open(noDatasetPropertySpec);
- fail("should have thrown an exception");
- } catch (Exception e) {}
+ @Test(expected = AssemblerException.class)
+ public void testErrorOnNoDataset() {
+ Assembler.general.open(noDatasetPropertySpec);
}
- @Test public void testErrorOnNoIndex() {
- try {
- Assembler.general.open(noIndexPropertySpec);
- fail("should have thrown an exception");
- } catch (Exception e) {}
+ @Test(expected = AssemblerException.class)
+ public void testErrorOnNoIndex() {
+ Assembler.general.open(noIndexPropertySpec);
+ }
+
+ @Test
+ public void testCustomTextDocProducer() {
+ Dataset dataset = (Dataset)Assembler.general.open(customTextDocProducerSpec) ;
+ DatasetGraphText dsgText = (DatasetGraphText)dataset.asDatasetGraph() ;
+ assertTrue(dsgText.getMonitor() instanceof CustomTextDocProducer) ;
}
static {
@@ -77,7 +87,27 @@ public class TestTextDatasetAssembler extends AbstractTestTextAssembler {
model.createResource(TESTBASE + "noIndexPropertySpec")
.addProperty(RDF.type, TextVocab.textDataset)
.addProperty(TextVocab.pDataset, SIMPLE_DATASET_SPEC);
+ customTextDocProducerSpec =
+ model.createResource(TESTBASE + "customTextDocProducerSpec")
+ .addProperty(RDF.type, TextVocab.textDataset)
+ .addProperty(TextVocab.pDataset, SIMPLE_DATASET_SPEC)
+ .addProperty(TextVocab.pIndex, SIMPLE_INDEX_SPEC5)
+ .addProperty(TextVocab.pTextDocProducer, model.createResource("java:org.apache.jena.query.text.assembler.TestTextDatasetAssembler$CustomTextDocProducer"));
}
+
+ private static class CustomTextDocProducer implements TextDocProducer {
+
+ public CustomTextDocProducer(TextIndex textIndex) { }
+
+ @Override
+ public void start() { }
+
+ @Override
+ public void finish() { }
+
+ @Override
+ public void change(QuadAction qaction, Node g, Node s, Node p, Node o) { }
+ }
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
index ffaf202..9a0d8ac 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
@@ -19,6 +19,7 @@
package org.apache.jena.query.text.assembler;
import org.apache.jena.query.text.TextIndexLucene ;
+import org.apache.lucene.analysis.core.KeywordAnalyzer ;
import org.apache.lucene.store.RAMDirectory ;
import org.junit.Test ;
@@ -66,6 +67,7 @@ public class TestTextIndexLuceneAssembler extends AbstractTestTextAssembler {
TextIndexLucene index = (TextIndexLucene) assembler.open(a, root, /*mode*/ null);
try {
assertFalse(index.getDirectory() instanceof RAMDirectory);
+ assertNotNull(index.getQueryAnalyzer());
}
finally {
index.close();
@@ -87,6 +89,16 @@ public class TestTextIndexLuceneAssembler extends AbstractTestTextAssembler {
index.close();
}
}
+
+ @Test public void testQueryAnalyzer() {
+ TextIndexLucene index = (TextIndexLucene) Assembler.general.open(SIMPLE_INDEX_SPEC_QUERY_ANALYZER);
+ try {
+ assertTrue(index.getQueryAnalyzer() instanceof KeywordAnalyzer);
+ }
+ finally {
+ index.close();
+ }
+ }
static {
TextAssembler.init();