You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by sa...@apache.org on 2015/02/25 10:33:37 UTC

jena git commit: Add option to specify an analyzer for the query string, and add mechanism for defining custom TextDocProducers

Repository: jena
Updated Branches:
  refs/heads/master 59cb7f3b6 -> f52f75014


Add option to specify an analyzer for the query string, and add mechanism for defining custom TextDocProducers


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/f52f7501
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/f52f7501
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/f52f7501

Branch: refs/heads/master
Commit: f52f7501482f99312332e931aa7313957019b75a
Parents: 59cb7f3
Author: Stephen Allen <sa...@apache.org>
Authored: Wed Feb 25 04:32:55 2015 -0500
Committer: Stephen Allen <sa...@apache.org>
Committed: Wed Feb 25 04:32:55 2015 -0500

----------------------------------------------------------------------
 jena-text/ReleaseNotes.txt                      |  2 +
 .../main/java/examples/JenaTextExample1.java    |  2 +-
 jena-text/src/main/java/jena/textindexdump.java |  2 +-
 .../jena/query/text/TextDatasetFactory.java     | 54 ++++++++++++---
 .../apache/jena/query/text/TextDocProducer.java |  6 ++
 .../query/text/TextDocProducerEntities.java     |  4 +-
 .../jena/query/text/TextDocProducerTriples.java |  4 +-
 .../apache/jena/query/text/TextIndexLucene.java | 13 +++-
 .../text/assembler/TextDatasetAssembler.java    | 31 +++++++--
 .../assembler/TextIndexLuceneAssembler.java     | 16 ++++-
 .../jena/query/text/assembler/TextVocab.java    |  2 +
 ...ractTestDatasetWithLuceneGraphTextIndex.java |  2 +-
 .../jena/query/text/TestBuildTextDataset.java   |  2 +-
 .../text/TestLuceneWithMultipleThreads.java     |  6 +-
 .../org/apache/jena/query/text/TestTextTDB.java |  2 +-
 .../assembler/AbstractTestTextAssembler.java    | 23 ++++++-
 .../assembler/TestTextDatasetAssembler.java     | 72 ++++++++++++++------
 .../assembler/TestTextIndexLuceneAssembler.java | 12 ++++
 18 files changed, 202 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/ReleaseNotes.txt
----------------------------------------------------------------------
diff --git a/jena-text/ReleaseNotes.txt b/jena-text/ReleaseNotes.txt
index 5645f55..bd27f64 100644
--- a/jena-text/ReleaseNotes.txt
+++ b/jena-text/ReleaseNotes.txt
@@ -4,5 +4,7 @@ ChangeLog for jena-text
 ==== jena-text 1.1.2
 
 + JENA-848 : Fix concurrency control for in-process Lucene index
++ Add option to specify an analyzer for the query string
++ Add mechanism for defining custom TextDocProducers
 
 

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/examples/JenaTextExample1.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/examples/JenaTextExample1.java b/jena-text/src/main/java/examples/JenaTextExample1.java
index e11d8be..631a096 100644
--- a/jena-text/src/main/java/examples/JenaTextExample1.java
+++ b/jena-text/src/main/java/examples/JenaTextExample1.java
@@ -65,7 +65,7 @@ public class JenaTextExample1
         Directory dir =  new RAMDirectory();
         
         // Join together into a dataset
-        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef) ;
+        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
         
         return ds ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/jena/textindexdump.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/jena/textindexdump.java b/jena-text/src/main/java/jena/textindexdump.java
index 773edca..6aceb16 100644
--- a/jena-text/src/main/java/jena/textindexdump.java
+++ b/jena-text/src/main/java/jena/textindexdump.java
@@ -100,7 +100,7 @@ public class textindexdump extends CmdARQ {
     private static void dump(TextIndexLucene textIndex) {
         try {
             Directory directory = textIndex.getDirectory() ;
-            Analyzer analyzer = textIndex.getAnalyzer() ;
+            Analyzer analyzer = textIndex.getQueryAnalyzer() ;
             IndexReader indexReader = DirectoryReader.open(directory) ;
             IndexSearcher indexSearcher = new IndexSearcher(indexReader);
             QueryParser queryParser = new QueryParser(TextIndexLucene.VER, textIndex.getDocDef().getPrimaryField(), analyzer);

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index f6ab21b..c7e4208 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -19,6 +19,7 @@
 package org.apache.jena.query.text;
 
 import org.apache.jena.query.text.assembler.TextVocab ;
+import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.store.Directory ;
 import org.apache.solr.client.solrj.SolrServer ;
 
@@ -51,6 +52,14 @@ public class TextDatasetFactory
         dsg = create(dsg, textIndex, closeIndexOnDSGClose) ;
         return DatasetFactory.create(dsg) ;
     }
+    
+    /** Create a text-indexed dataset, optionally allowing the text index to be closed if the Dataset is */
+    public static Dataset create(Dataset base, TextIndex textIndex, boolean closeIndexOnDSGClose, TextDocProducer producer)
+    {
+        DatasetGraph dsg = base.asDatasetGraph() ;
+        dsg = create(dsg, textIndex, closeIndexOnDSGClose, producer) ;
+        return DatasetFactory.create(dsg) ;
+    }
 
 
     /** Create a text-indexed DatasetGraph */ 
@@ -62,7 +71,12 @@ public class TextDatasetFactory
     /** Create a text-indexed DatasetGraph, optionally allowing the text index to be closed if the DatasetGraph is */
     public static DatasetGraph create(DatasetGraph dsg, TextIndex textIndex, boolean closeIndexOnDSGClose)
     {
-        TextDocProducer producer = new TextDocProducerTriples(textIndex.getDocDef(), textIndex) ;
+        return create(dsg, textIndex, closeIndexOnDSGClose, null);
+    }
+    
+    /** Create a text-indexed DatasetGraph, optionally allowing the text index to be closed if the DatasetGraph is */
+    public static DatasetGraph create(DatasetGraph dsg, TextIndex textIndex, boolean closeIndexOnDSGClose, TextDocProducer producer) {
+        if (producer == null) producer = new TextDocProducerTriples(textIndex) ;
         DatasetGraph dsgt = new DatasetGraphText(dsg, textIndex, producer, closeIndexOnDSGClose) ;
         // Also set on dsg
         Context c = dsgt.getContext() ;
@@ -71,24 +85,44 @@ public class TextDatasetFactory
         return dsgt ;
     }
     
-    /** Create a Lucene TextIndex */ 
-    public static TextIndex createLuceneIndex(Directory directory, EntityDefinition entMap)
+    /**
+     * Create a Lucene TextIndex
+     * 
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     */ 
+    public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
     {
-        TextIndex index = new TextIndexLucene(directory, entMap) ;
+        TextIndex index = new TextIndexLucene(directory, def, queryAnalyzer) ;
         return index ; 
     }
 
-    /** Create a text-indexed dataset, using Lucene */ 
-    public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition entMap)
+    /** 
+     * Create a text-indexed dataset, using Lucene
+     * 
+     * @param base the base Dataset
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     */ 
+    public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
     {
-        TextIndex index = createLuceneIndex(directory, entMap) ;
+        TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
         return create(base, index, true) ; 
     }
 
-    /** Create a text-indexed dataset, using Lucene */ 
-    public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition entMap)
+    /**
+     * Create a text-indexed dataset, using Lucene
+     * 
+     * @param base the base DatasetGraph
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     */ 
+    public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
     {
-        TextIndex index = createLuceneIndex(directory, entMap) ;
+        TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
         return create(base, index, true) ; 
     }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
index 8e74af7..afb522f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java
@@ -20,6 +20,12 @@ package org.apache.jena.query.text;
 
 import com.hp.hpl.jena.sparql.core.DatasetChanges ;
 
+/**
+ * Responsible for monitoring changes on a dataset and updating the TextIndex appropriately.
+ * <p>
+ * <strong>Note:</strong> If implementation is to be instantiated dynamically via an Assembler, it must
+ * contain a constructor that accepts a single parameter of type {@link org.apache.jena.query.text.TextIndex}
+ */
 public interface TextDocProducer extends DatasetChanges
 {
 

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
index 9e2de8d..4427de6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java
@@ -40,8 +40,8 @@ public class TextDocProducerEntities extends DatasetChangesBatched implements Te
     // therefore whether or not we have to do autocommit
     private final ThreadLocal<Boolean> inTransaction = new ThreadLocal<Boolean>() ;
 
-    public TextDocProducerEntities(EntityDefinition defn, TextIndex indexer) {
-        this.defn = defn ;
+    public TextDocProducerEntities(TextIndex indexer) {
+        this.defn = indexer.getDocDef() ;
         this.indexer = indexer ;
         inTransaction.set(false) ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
index 3700eb0..30da166 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java
@@ -34,8 +34,8 @@ public class TextDocProducerTriples implements TextDocProducer {
     private final ThreadLocal<Boolean> inTransaction = new ThreadLocal<Boolean>() ;
     
 
-    public TextDocProducerTriples(EntityDefinition defn, TextIndex indexer) {
-        this.defn = defn ;
+    public TextDocProducerTriples(TextIndex indexer) {
+        this.defn = indexer.getDocDef() ;
         this.indexer = indexer ;
         inTransaction.set(false) ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 36d4050..12ff1ea 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -75,6 +75,7 @@ public class TextIndexLucene implements TextIndex {
     private final EntityDefinition docDef ;
     private final Directory        directory ;
     private final Analyzer         analyzer ;
+    private final Analyzer         queryAnalyzer ;
     
     // The IndexWriter can't be final because we may have to recreate it if rollback() is called.
     // However, it needs to be volatile in case the next write transaction is on a different thread,
@@ -87,8 +88,9 @@ public class TextIndexLucene implements TextIndex {
      * 
      * @param directory The Lucene Directory for the index
      * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
      */
-    public TextIndexLucene(Directory directory, EntityDefinition def) {
+    public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer queryAnalyzer) {
         this.directory = directory ;
         this.docDef = def ;
 
@@ -107,6 +109,7 @@ public class TextIndexLucene implements TextIndex {
         }
         
         this.analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(VER), analyzerPerField) ;
+        this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : analyzer ;
 
         openIndexWriter();
     }
@@ -133,6 +136,10 @@ public class TextIndexLucene implements TextIndex {
         return analyzer ;
     }
     
+    public Analyzer getQueryAnalyzer() {
+        return queryAnalyzer ;
+    }
+    
     public IndexWriter getIndexWriter() {
         return indexWriter;
     }
@@ -239,7 +246,7 @@ public class TextIndexLucene implements TextIndex {
     private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
         String escaped = QueryParserBase.escape(uri) ;
         String qs = docDef.getEntityField() + ":" + escaped ;
-        Query query = parseQuery(qs, docDef.getPrimaryField(), analyzer) ;
+        Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
         ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs ;
         List<Map<String, Node>> records = new ArrayList<Map<String, Node>>() ;
@@ -286,7 +293,7 @@ public class TextIndexLucene implements TextIndex {
 
     private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
-        Query query = parseQuery(qs, docDef.getPrimaryField(), analyzer) ;
+        Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
         if ( limit <= 0 )
             limit = MAX_N ;
         ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
index c343824..7fb30cb 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java
@@ -18,10 +18,12 @@
 
 package org.apache.jena.query.text.assembler;
 
-import static org.apache.jena.query.text.assembler.TextVocab.pDataset ;
-import static org.apache.jena.query.text.assembler.TextVocab.pIndex ;
-import static org.apache.jena.query.text.assembler.TextVocab.textDataset ;
+import java.lang.reflect.Constructor ;
+
+import org.apache.jena.atlas.logging.Log ;
 import org.apache.jena.query.text.TextDatasetFactory ;
+import org.apache.jena.query.text.TextDocProducer ;
+import org.apache.jena.query.text.TextDocProducerTriples ;
 import org.apache.jena.query.text.TextIndex ;
 
 import com.hp.hpl.jena.assembler.Assembler ;
@@ -29,9 +31,13 @@ import com.hp.hpl.jena.assembler.Mode ;
 import com.hp.hpl.jena.assembler.assemblers.AssemblerBase ;
 import com.hp.hpl.jena.query.Dataset ;
 import com.hp.hpl.jena.rdf.model.Resource ;
+import com.hp.hpl.jena.sparql.ARQConstants ;
 import com.hp.hpl.jena.sparql.core.assembler.DatasetAssembler ;
+import com.hp.hpl.jena.sparql.util.Loader ;
 import com.hp.hpl.jena.sparql.util.graph.GraphUtils ;
 
+import static org.apache.jena.query.text.assembler.TextVocab.* ;
+
 public class TextDatasetAssembler extends AssemblerBase implements Assembler
 {
     private DatasetAssembler datasetAssembler = new DatasetAssembler() ;
@@ -51,11 +57,28 @@ public class TextDatasetAssembler extends AssemblerBase implements Assembler
     {
         Resource dataset = GraphUtils.getResourceValue(root, pDataset) ;
         Resource index   = GraphUtils.getResourceValue(root, pIndex) ;
+        Resource textDocProducerNode = GraphUtils.getResourceValue(root, pTextDocProducer) ;
         
         Dataset ds = (Dataset)a.open(dataset) ;
         TextIndex textIndex = (TextIndex)a.open(index) ;
+        // Null will use the default producer
+        TextDocProducer textDocProducer = null ;
+        if (null != textDocProducerNode) {
+            Class<?> c = Loader.loadClass(textDocProducerNode.getURI(), TextDocProducer.class) ;
+            try
+            {
+                Constructor<?> ctor = c.getConstructor(TextIndex.class) ;
+                textDocProducer = (TextDocProducer)ctor.newInstance(textIndex) ;
+            }
+            catch (Exception ex)
+            {
+                String className = textDocProducerNode.getURI().substring(ARQConstants.javaClassURIScheme.length()) ;
+                Log.warn(Loader.class, "Exception during instantiation '"+className+"': "+ex.getMessage()) ;
+                return null ;
+            }
+        }
         
-        Dataset dst = TextDatasetFactory.create(ds, textIndex, true) ;
+        Dataset dst = TextDatasetFactory.create(ds, textIndex, true, textDocProducer) ;
         return dst ;
         
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index bbfb40a..670c530 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -20,6 +20,7 @@ package org.apache.jena.query.text.assembler ;
 
 import static org.apache.jena.query.text.assembler.TextVocab.pDirectory ;
 import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap ;
+import static org.apache.jena.query.text.assembler.TextVocab.pQueryAnalyzer ;
 
 import java.io.File ;
 import java.io.IOException ;
@@ -30,6 +31,7 @@ import org.apache.jena.query.text.TextDatasetFactory ;
 import org.apache.jena.query.text.TextIndex ;
 import org.apache.jena.query.text.TextIndexException ;
 import org.apache.jena.riot.system.IRILib ;
+import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.store.Directory ;
 import org.apache.lucene.store.FSDirectory ;
 import org.apache.lucene.store.RAMDirectory ;
@@ -39,6 +41,7 @@ import com.hp.hpl.jena.assembler.Mode ;
 import com.hp.hpl.jena.assembler.assemblers.AssemblerBase ;
 import com.hp.hpl.jena.rdf.model.RDFNode ;
 import com.hp.hpl.jena.rdf.model.Resource ;
+import com.hp.hpl.jena.rdf.model.Statement ;
 import com.hp.hpl.jena.sparql.util.graph.GraphUtils ;
 
 public class TextIndexLuceneAssembler extends AssemblerBase {
@@ -75,11 +78,22 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
                 File dir = new File(path) ;
                 directory = FSDirectory.open(dir) ;
             }
+            
+            Analyzer queryAnalyzer = null;
+            Statement queryAnalyzerStatement = root.getProperty(pQueryAnalyzer);
+            if (null != queryAnalyzerStatement) {
+                RDFNode qaNode = queryAnalyzerStatement.getObject();
+                if (! qaNode.isResource()) {
+                    throw new TextIndexException("Text query analyzer property is not a resource : " + qaNode);
+                }
+                Resource analyzerResource = (Resource) qaNode;
+                queryAnalyzer = (Analyzer) a.open(analyzerResource);
+            }
 
             Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
             EntityDefinition docDef = (EntityDefinition)a.open(r) ;
 
-            return TextDatasetFactory.createLuceneIndex(directory, docDef) ;
+            return TextDatasetFactory.createLuceneIndex(directory, docDef, queryAnalyzer) ;
         } catch (IOException e) {
             IO.exception(e) ;
             return null ;

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 62acaab..78da358 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -31,12 +31,14 @@ public class TextVocab
     public static final Resource textDataset        = Vocab.resource(NS, "TextDataset") ;
     public static final Property pDataset           = Vocab.property(NS, "dataset") ;
     public static final Property pIndex             = Vocab.property(NS, "index") ;
+    public static final Property pTextDocProducer   = Vocab.property(NS, "textDocProducer") ;
     
     public static final Resource textIndex          = Vocab.resource(NS, "TextIndex") ;
     public static final Resource textIndexSolr      = Vocab.resource(NS, "TextIndexSolr") ;
     public static final Resource textIndexLucene    = Vocab.resource(NS, "TextIndexLucene") ;
     public static final Property pServer            = Vocab.property(NS, "server") ;            // Solr
     public static final Property pDirectory         = Vocab.property(NS, "directory") ;         // Lucene
+    public static final Property pQueryAnalyzer     = Vocab.property(NS, "queryAnalyzer") ;
     public static final Property pEntityMap         = Vocab.property(NS, "entityMap") ;
     
     // Entity definition

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
index 7d78fba..7f83b90 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
@@ -39,7 +39,7 @@ public class AbstractTestDatasetWithLuceneGraphTextIndex extends AbstractTestDat
         Directory dir = new RAMDirectory() ;
         EntityDefinition eDef = new EntityDefinition("iri", "text", "graph", RDFS.label.asNode()) ;
         eDef.set("comment", RDFS.comment.asNode()) ; // some tests require indexing rdfs:comment
-        TextIndex tidx = new TextIndexLucene(dir, eDef) ;
+        TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
         dataset = TextDatasetFactory.create(ds1, tidx) ;
 	}
     

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
index daba00c..e045477 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
@@ -117,7 +117,7 @@ public class TestBuildTextDataset extends BaseTest
         Directory dir = new RAMDirectory() ;
 
         // Join together into a dataset
-        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef) ;
+        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
 
         return ds ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
index b60110d..b14526e 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
@@ -63,7 +63,7 @@ public class TestLuceneWithMultipleThreads
     @Test
     public void testReadInMiddleOfWrite() throws InterruptedException, ExecutionException
     {
-        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef);
+        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
         final Dataset ds = DatasetFactory.create(dsg);
         final ExecutorService execService = Executors.newSingleThreadExecutor();
         final Future<?> f = execService.submit(new Runnable()
@@ -119,7 +119,7 @@ public class TestLuceneWithMultipleThreads
     @Test
     public void testWriteInMiddleOfRead() throws InterruptedException, ExecutionException
     {
-        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef);
+        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
         final int numReads = 10;
         final Dataset ds = DatasetFactory.create(dsg);
         final ExecutorService execService = Executors.newFixedThreadPool(10);
@@ -187,7 +187,7 @@ public class TestLuceneWithMultipleThreads
     @Test
     public void testIsolation() throws InterruptedException, ExecutionException {
         
-        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), entDef);
+        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), entDef, null);
         
         final int numReaders = 2;
         final List<Future<?>> futures = new ArrayList<Future<?>>(numReaders);

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
index 7f1d13e..a80d399 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
@@ -40,7 +40,7 @@ public class TestTextTDB extends BaseTest
         Dataset ds1 = TDBFactory.createDataset() ;
         Directory dir = new RAMDirectory() ;
         EntityDefinition eDef = new EntityDefinition("iri", "text", RDFS.label) ;
-        TextIndex tidx = new TextIndexLucene(dir, eDef) ;
+        TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
         Dataset ds = TextDatasetFactory.create(ds1, tidx) ;
         return ds ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
index 49d029e..2fde909 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java
@@ -41,6 +41,8 @@ public abstract class AbstractTestTextAssembler {
 	protected static final Resource SIMPLE_INDEX_SPEC2;
 	protected static final Resource SIMPLE_INDEX_SPEC3;
 	protected static final Resource SIMPLE_INDEX_SPEC4;
+	protected static final Resource SIMPLE_INDEX_SPEC5;
+	protected static final Resource SIMPLE_INDEX_SPEC_QUERY_ANALYZER;
 	protected static final Resource SIMPLE_ENTITY_MAP_SPEC;
 	protected static final Resource SIMPLE_INDEX_SPEC_LITERAL_DIR;
 	protected static final Resource SIMPLE_INDEX_SPEC_MEM_DIR;
@@ -76,17 +78,32 @@ public abstract class AbstractTestTextAssembler {
                      .addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec2"))
                      .addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
 		
-		SIMPLE_INDEX_SPEC4 =
+		SIMPLE_INDEX_SPEC3 =
                 model.createResource(TESTBASE + "simpleIndexSpec3")
                      .addProperty(RDF.type, TextVocab.textIndexLucene)
                      .addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec3"))
                      .addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
 		
-		SIMPLE_INDEX_SPEC3 =
+		SIMPLE_INDEX_SPEC4 =
                 model.createResource(TESTBASE + "simpleIndexSpec4")
                      .addProperty(RDF.type, TextVocab.textIndexLucene)
                      .addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec4"))
                      .addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
+		
+		SIMPLE_INDEX_SPEC5 =
+		        model.createResource(TESTBASE + "simpleIndexSpec5")
+		        .addProperty(RDF.type, TextVocab.textIndexLucene)
+		        .addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpec5"))
+		        .addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
+		
+		SIMPLE_INDEX_SPEC_QUERY_ANALYZER =
+                model.createResource(TESTBASE + "simpleIndexSpecQueryAnalyzer")
+                     .addProperty(RDF.type, TextVocab.textIndexLucene)
+                     .addProperty(TextVocab.pDirectory, model.createResource("file:target/test/testasm/simpleIndexSpecQueryAnalyzer"))
+                     .addProperty(TextVocab.pQueryAnalyzer,
+                         model.createResource().addProperty(RDF.type, TextVocab.keywordAnalyzer)
+                     )
+                     .addProperty(TextVocab.pEntityMap, SIMPLE_ENTITY_MAP_SPEC);
 
 		SIMPLE_INDEX_SPEC_LITERAL_DIR =
 				model.createResource(TESTBASE + "simpleIndexLiteralDirSpec")
@@ -108,6 +125,8 @@ public abstract class AbstractTestTextAssembler {
 	    indexDir = new File("target/test/testasm/simpleIndexSpec2"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
 	    indexDir = new File("target/test/testasm/simpleIndexSpec3"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
 	    indexDir = new File("target/test/testasm/simpleIndexSpec4"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+	    indexDir = new File("target/test/testasm/simpleIndexSpec5"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+	    indexDir = new File("target/test/testasm/simpleIndexSpecQueryAnalyzer"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
 	    indexDir = new File("target/test/testasm/simpleIndexLiteralDir"); if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
 	}
 	

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
index cdc9f54..c0bdb82 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
@@ -18,18 +18,23 @@
 
 package org.apache.jena.query.text.assembler;
 
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import org.apache.jena.query.text.DatasetGraphText ;
+import org.apache.jena.query.text.TextDocProducer ;
+import org.apache.jena.query.text.TextIndex ;
+import org.apache.jena.query.text.TextIndexLucene ;
+import org.apache.jena.query.text.TextQuery ;
+import org.junit.Test ;
 
-import org.apache.jena.query.text.TextIndexLucene;
-import org.apache.jena.query.text.TextQuery;
-import org.junit.Test;
+import com.hp.hpl.jena.assembler.Assembler ;
+import com.hp.hpl.jena.assembler.exceptions.AssemblerException ;
+import com.hp.hpl.jena.graph.Node ;
+import com.hp.hpl.jena.query.Dataset ;
+import com.hp.hpl.jena.rdf.model.Resource ;
+import com.hp.hpl.jena.sparql.core.QuadAction ;
+import com.hp.hpl.jena.tdb.assembler.AssemblerTDB ;
+import com.hp.hpl.jena.vocabulary.RDF ;
 
-import com.hp.hpl.jena.assembler.Assembler;
-import com.hp.hpl.jena.query.Dataset;
-import com.hp.hpl.jena.rdf.model.Resource;
-import com.hp.hpl.jena.tdb.assembler.AssemblerTDB;
-import com.hp.hpl.jena.vocabulary.RDF;
+import static org.junit.Assert.* ;
 
 /**
  * Test the text dataset assembler.
@@ -41,24 +46,29 @@ public class TestTextDatasetAssembler extends AbstractTestTextAssembler {
 	private static final Resource spec1;
 	private static final Resource noDatasetPropertySpec;
 	private static final Resource noIndexPropertySpec;
+	private static final Resource customTextDocProducerSpec;
 	
-	@Test public void testSimpleDatasetAssembler() {
+	@Test
+	public void testSimpleDatasetAssembler() {
 		Dataset dataset = (Dataset) Assembler.general.open(spec1);
 		assertTrue(dataset.getContext().get(TextQuery.textIndex) instanceof TextIndexLucene);
 	}
 	
-	@Test public void testErrorOnNoDataset() {
-		try {
-		    Assembler.general.open(noDatasetPropertySpec);
-		    fail("should have thrown an exception");
-		} catch (Exception e) {}
+	@Test(expected = AssemblerException.class)
+	public void testErrorOnNoDataset() {
+	    Assembler.general.open(noDatasetPropertySpec);
 	}
 	
-	@Test public void testErrorOnNoIndex() {
-		try {
-		    Assembler.general.open(noIndexPropertySpec);
-		    fail("should have thrown an exception");
-		} catch (Exception e) {}
+	@Test(expected = AssemblerException.class)
+	public void testErrorOnNoIndex() {
+	    Assembler.general.open(noIndexPropertySpec);
+	}
+	
+	@Test
+	public void testCustomTextDocProducer() {
+	    Dataset dataset = (Dataset)Assembler.general.open(customTextDocProducerSpec) ;
+	    DatasetGraphText dsgText = (DatasetGraphText)dataset.asDatasetGraph() ;
+        assertTrue(dsgText.getMonitor() instanceof CustomTextDocProducer) ;
 	}
 	
 	static {
@@ -77,7 +87,27 @@ public class TestTextDatasetAssembler extends AbstractTestTextAssembler {
 				model.createResource(TESTBASE + "noIndexPropertySpec")
 				     .addProperty(RDF.type, TextVocab.textDataset)
 				     .addProperty(TextVocab.pDataset, SIMPLE_DATASET_SPEC);
+		customTextDocProducerSpec =
+                model.createResource(TESTBASE + "customTextDocProducerSpec")
+                     .addProperty(RDF.type, TextVocab.textDataset)
+                     .addProperty(TextVocab.pDataset, SIMPLE_DATASET_SPEC)
+                     .addProperty(TextVocab.pIndex, SIMPLE_INDEX_SPEC5)
+                     .addProperty(TextVocab.pTextDocProducer, model.createResource("java:org.apache.jena.query.text.assembler.TestTextDatasetAssembler$CustomTextDocProducer"));
 		
 	}
+	
+	private static class CustomTextDocProducer implements TextDocProducer {
+	    
+	    public CustomTextDocProducer(TextIndex textIndex) { }
+
+        @Override
+        public void start() { }
+
+        @Override
+        public void finish() { }
+
+        @Override
+        public void change(QuadAction qaction, Node g, Node s, Node p, Node o) { }
+	}
 
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/f52f7501/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
index ffaf202..9a0d8ac 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
@@ -19,6 +19,7 @@
 package org.apache.jena.query.text.assembler;
 
 import org.apache.jena.query.text.TextIndexLucene ;
+import org.apache.lucene.analysis.core.KeywordAnalyzer ;
 import org.apache.lucene.store.RAMDirectory ;
 import org.junit.Test ;
 
@@ -66,6 +67,7 @@ public class TestTextIndexLuceneAssembler extends AbstractTestTextAssembler {
         TextIndexLucene index = (TextIndexLucene) assembler.open(a, root, /*mode*/ null);
         try {
             assertFalse(index.getDirectory() instanceof RAMDirectory);
+            assertNotNull(index.getQueryAnalyzer());
         }
         finally {
             index.close();
@@ -87,6 +89,16 @@ public class TestTextIndexLuceneAssembler extends AbstractTestTextAssembler {
             index.close();
         }
     }
+    
+    @Test public void testQueryAnalyzer() {
+        TextIndexLucene index = (TextIndexLucene) Assembler.general.open(SIMPLE_INDEX_SPEC_QUERY_ANALYZER);
+        try {
+            assertTrue(index.getQueryAnalyzer() instanceof KeywordAnalyzer);
+        }
+        finally {
+            index.close();
+        }
+    }
 
     static {
         TextAssembler.init();