You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:10 UTC

[01/18] jena git commit: initial commit for generic analyzers

Repository: jena
Updated Branches:
  refs/heads/master eb4b5b689 -> 31fe23868


initial commit for generic analyzers

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1440e81d
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1440e81d
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1440e81d

Branch: refs/heads/master
Commit: 1440e81d75ee01baf874c407d5f0017bc59c6787
Parents: e01d208
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Mon Apr 17 14:53:41 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Mon Apr 17 14:53:41 2017 -0500

----------------------------------------------------------------------
 .../assembler/GenericAnalyzerAssembler.java     | 20 ++++++++++++++++++++
 .../query/text/assembler/TextAssembler.java     |  1 +
 .../jena/query/text/assembler/TextVocab.java    |  1 +
 3 files changed, 22 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/1440e81d/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
new file mode 100644
index 0000000..5c25cb2
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -0,0 +1,20 @@
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.rdf.model.Resource;
+
+public class GenericAnalyzerAssembler extends AssemblerBase {
+
+	public GenericAnalyzerAssembler() {
+		// TODO Auto-generated constructor stub
+	}
+
+	@Override
+	public Object open(Assembler a, Resource root, Mode mode) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1440e81d/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 80b2f7e..636c6bc 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -37,6 +37,7 @@ public class TextAssembler
         Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ;
 
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/1440e81d/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 719d404..bc49d10 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -66,6 +66,7 @@ public class TextVocab
     public static final Resource lowerCaseKeywordAnalyzer    = Vocab.resource(NS, "LowerCaseKeywordAnalyzer");
     public static final Resource localizedAnalyzer    = Vocab.resource(NS, "LocalizedAnalyzer");
     public static final Resource configurableAnalyzer = Vocab.resource(NS, "ConfigurableAnalyzer");
+    public static final Resource genericAnalyzer   = Vocab.resource(NS, "GenericAnalyzer");
     
     // Tokenizers
     public static final Resource standardTokenizer  = Vocab.resource(NS, "StandardTokenizer");


[10/18] jena git commit: represent parameter types as resources like text:TypeSet instead of literal string

Posted by an...@apache.org.
represent parameter types as resources like text:TypeSet instead of
literal string

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/311efab2
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/311efab2
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/311efab2

Branch: refs/heads/master
Commit: 311efab2fd26a58406b29b64d74b41039292d080
Parents: a3bb8e41
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 09:18:35 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 09:18:35 2017 -0500

----------------------------------------------------------------------
 .../assembler/GenericAnalyzerAssembler.java     | 208 ++++++++++---------
 .../jena/query/text/assembler/TextVocab.java    |   6 +
 .../assembler/TestGenericAnalyzerAssembler.java |  29 +--
 3 files changed, 125 insertions(+), 118 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 853fcb6..4f10b85 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -43,12 +43,12 @@ import org.apache.lucene.analysis.CharArraySet;
  * <p>
  * The parameters may be of the following types:
  * <pre>
- *     string    String
- *     set       org.apache.lucene.analysis.util.CharArraySet
- *     file      java.io.FileReader
- *     int       int
- *     boolean   boolean
- *     analyzer  org.apache.lucene.analysis.Analyzer
+ *     text:TypeString    String
+ *     text:TypeSet       org.apache.lucene.analysis.util.CharArraySet
+ *     text:TypeFile      java.io.FileReader
+ *     text:TypeInt       int
+ *     text:TypeBoolean   boolean
+ *     text:TypeAnalyzer  org.apache.lucene.analysis.Analyzer
  * </pre>
  * 
  * Although the list of types is not exhaustive it is a simple matter
@@ -74,15 +74,18 @@ import org.apache.lucene.analysis.CharArraySet;
  * <ul>
  * <li>an optional <code>text:paramName</code> that may be used to document which 
  * parameter is represented</li>
- * <li>a <code>text:paramType</code> which is one of: <code>string</code>, 
- * <code>set</code>, <code>file</code>, <code>int</code>, <code>boolean</code>.</li>
- * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
+ * <li>a <code>text:paramType</code> which is one of: <code>text:TypeString</code>, 
+ * <code>text:TypeSet</code>, <code>text:TypeFile</code>, <code>text:TypeInt</code>, 
+ * <code>text:TypeBoolean</code>, <code>text:TypeAnalyzer</code>.</li>
+ * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int or resource.</li>
  * </ul>
  * <p>
- * A parameter of type <code>set</code> <i>must have</i> a list of zero or more <code>String</code>s.
+ * A parameter of type <code>text:TypeSet</code> <i>must have</i> a list of zero or 
+ * more <code>String</code>s.
  * <p>
- * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
- * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
+ * A parameter of type <code>text:TypeString</code>, <code>text:TypeFile</code>, 
+ * <code>text:TypeBoolean</code>, <code>text:TypeInt</code> or <code>text:TypeAnalyzer</code> 
+ * <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
  * <p>
  * Examples:
  * <pre>
@@ -94,10 +97,10 @@ import org.apache.lucene.analysis.CharArraySet;
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                text:params (
                     [ text:paramName "stopwords" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("the" "a" "an") ]
                     [ text:paramName "stemExclusionSet" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("ing" "ed") ]
                     )
            ] .
@@ -111,10 +114,10 @@ import org.apache.lucene.analysis.CharArraySet;
                text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
                text:params (
                     [ text:paramName "defaultAnalyzer" ;
-                      text:paramType "analyzer" ;
+                      text:paramType text:TypeAnalyzer ;
                       text:paramValue [ a text:SimpleAnalyzer ] ]
                     [ text:paramName "maxShingleSize" ;
-                      text:paramType "int" ;
+                      text:paramType text:TypeInt ;
                       text:paramValue 3 ]
                     )
            ] .
@@ -130,71 +133,71 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                text:params (
                     [ text:paramName "stopwords" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("the" "a" "an") ]
                     [ text:paramName "stemExclusionSet" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("ing" "ed") ]
                     )
            ] .
      */
 
-    public static final String TYPE_ANALYZER = "analyzer";
-    public static final String TYPE_BOOL = "boolean";
-    public static final String TYPE_FILE = "file";
-    public static final String TYPE_INT = "int";
-    public static final String TYPE_SET = "set";
-    public static final String TYPE_STRING = "string";
+    public static final String TYPE_ANALYZER   = "TypeAnalyzer";
+    public static final String TYPE_BOOL       = "TypeBoolean";
+    public static final String TYPE_FILE       = "TypeFile";
+    public static final String TYPE_INT        = "TypeInt";
+    public static final String TYPE_SET        = "TypeSet";
+    public static final String TYPE_STRING     = "TypeString";
 
     @Override
-	public Analyzer open(Assembler a, Resource root, Mode mode) {
-	    if (root.hasProperty(TextVocab.pClass)) {
-	        // text:class is expected to be a string literal
-	        String className = root.getProperty(TextVocab.pClass).getString();
-
-	        // is the class accessible?
-	        Class<?> clazz = null;
-	        try {
-	            clazz = Class.forName(className);
-	        } catch (ClassNotFoundException e) {
-	            Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
-	            return null;
-	        }
-
-	        // Is the class an Analyzer?
-	        if (!Analyzer.class.isAssignableFrom(clazz)) {
-	            Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
-	            return null;
-	        }
-	        
-	        if (root.hasProperty(TextVocab.pParams)) {
-	            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
-	            if (! node.isResource()) {
-	                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
-	            }
-
-	            List<ParamSpec> specs = getParamSpecs((Resource) node);
-
-	            // split the param specs into classes and values for constructor lookup
-	            final Class<?> paramClasses[] = new Class<?>[specs.size()];
-	            final Object paramValues[] = new Object[specs.size()];
-	            for (int i = 0; i < specs.size(); i++) {
-	                ParamSpec spec = specs.get(i);
-	                paramClasses[i] = spec.getValueClass();
-	                paramValues[i] = spec.getValue();
-	            }
-
-	            // Create new analyzer
-	            return newAnalyzer(clazz, paramClasses, paramValues);
-
-	        } else {
-	            // use the nullary Analyzer constructor
-	            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
-	        }
-	    } else {
-	        throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
-	    }
-	}
+    public Analyzer open(Assembler a, Resource root, Mode mode) {
+        if (root.hasProperty(TextVocab.pClass)) {
+            // text:class is expected to be a string literal
+            String className = root.getProperty(TextVocab.pClass).getString();
+
+            // is the class accessible?
+            Class<?> clazz = null;
+            try {
+                clazz = Class.forName(className);
+            } catch (ClassNotFoundException e) {
+                Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
+                return null;
+            }
+
+            // Is the class an Analyzer?
+            if (!Analyzer.class.isAssignableFrom(clazz)) {
+                Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
+                return null;
+            }
+
+            if (root.hasProperty(TextVocab.pParams)) {
+                RDFNode node = root.getProperty(TextVocab.pParams).getObject();
+                if (! node.isResource()) {
+                    throw new TextIndexException("text:params must be a list of parameter resources: " + node);
+                }
+
+                List<ParamSpec> specs = getParamSpecs((Resource) node);
+
+                // split the param specs into classes and values for constructor lookup
+                final Class<?> paramClasses[] = new Class<?>[specs.size()];
+                final Object paramValues[] = new Object[specs.size()];
+                for (int i = 0; i < specs.size(); i++) {
+                    ParamSpec spec = specs.get(i);
+                    paramClasses[i] = spec.getValueClass();
+                    paramValues[i] = spec.getValue();
+                }
+
+                // Create new analyzer
+                return newAnalyzer(clazz, paramClasses, paramValues);
+
+            } else {
+                // use the nullary Analyzer constructor
+                return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
+            }
+        } else {
+            throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
+        }
+    }
 
     /**
      * Create instance of the Lucene Analyzer, <code>class</code>, with provided parameters
@@ -221,47 +224,52 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
         return null;
     }
-    
+
     private List<ParamSpec> getParamSpecs(Resource list) {
         List<ParamSpec> result = new ArrayList<>();
         Resource current = list;
-        
+
         while (current != null && ! current.equals(RDF.nil)){
             Statement firstStmt = current.getProperty(RDF.first);
             if (firstStmt == null) {
                 throw new TextIndexException("parameter list not well formed: " + current);
             }
-            
+
             RDFNode first = firstStmt.getObject();
             if (! first.isResource()) {
                 throw new TextIndexException("parameter specification must be an anon resource : " + first);
             }
 
             result.add(getParamSpec((Resource) first));
-            
+
             Statement restStmt = current.getProperty(RDF.rest);
             if (restStmt == null) {
                 throw new TextIndexException("parameter list not terminated by rdf:nil");
             }
-            
+
             RDFNode rest = restStmt.getObject();
             if (! rest.isResource()) {
                 throw new TextIndexException("parameter list node is not a resource : " + rest);
             }
-            
+
             current = (Resource) rest;
         }
-        
+
         return result;
     }
-    
+
     private ParamSpec getParamSpec(Resource node) {
         Statement nameStmt = node.getProperty(TextVocab.pParamName);
         Statement typeStmt = node.getProperty(TextVocab.pParamType);
         Statement valueStmt = node.getProperty(TextVocab.pParamValue);
         
+        if (typeStmt == null) {
+            throw new TextIndexException("Parameter specification must have a text:paramType: " + node);
+        }        
+        Resource typeRes = typeStmt.getResource();
+        String type = typeRes.getLocalName();
+
         String name = getStringValue(nameStmt);
-        String type = getStringValue(typeStmt);
         String value = getStringValue(valueStmt);
 
         switch (type) {
@@ -274,7 +282,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
             return new ParamSpec(name, value, String.class);
         }
-        
+
         // java.io.FileReader
         case TYPE_FILE: {
 
@@ -291,23 +299,23 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
                 throw new TextIndexException("File " + value + " for param " + name + " not found!");
             }
         }
-        
+
         // org.apache.lucene.analysis.util.CharArraySet
         case TYPE_SET: {
             if (valueStmt == null) {
                 throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
             }
-            
+
             RDFNode valueNode = valueStmt.getObject();
             if (!valueNode.isResource()) {
                 throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode);
             }
-            
+
             List<String> values = toStrings((Resource) valueNode);
 
             return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
         }
-        
+
         // int
         case TYPE_INT:
             if (value == null) {
@@ -317,7 +325,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
             int n = ((Literal) valueStmt.getObject()).getInt();
             return new ParamSpec(name, n, int.class);
 
-        // boolean
+            // boolean
         case TYPE_BOOL:
             if (value == null) {
                 throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
@@ -325,21 +333,21 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
             boolean b = ((Literal) valueStmt.getObject()).getBoolean();
             return new ParamSpec(name, b, boolean.class);
-        
-        // org.apache.lucene.analysis.Analyzer
+
+            // org.apache.lucene.analysis.Analyzer
         case TYPE_ANALYZER:
             if (valueStmt == null) {
                 throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
             }
-            
+
             RDFNode valueNode = valueStmt.getObject();
             if (!valueNode.isResource()) {
                 throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
             }
-            
+
             Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
             return new ParamSpec(name, analyzer, Analyzer.class);
-        
+
         default:
             // there was no match
             Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
@@ -348,7 +356,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
         return null;
     }
-    
+
     private String getStringValue(Statement stmt) {
         if (stmt == null) {
             return null;
@@ -365,33 +373,33 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
     private List<String> toStrings(Resource list) {
         List<String> result = new ArrayList<>();
         Resource current = list;
-        
+
         while (current != null && ! current.equals(RDF.nil)){
             Statement firstStmt = current.getProperty(RDF.first);
             if (firstStmt == null) {
                 throw new TextIndexException("param spec of type set not well formed");
             }
-            
+
             RDFNode first = firstStmt.getObject();
             if (! first.isLiteral()) {
                 throw new TextIndexException("param spec of type set item is not a literal: " + first);
             }
-            
+
             result.add(((Literal)first).getLexicalForm());
-            
+
             Statement restStmt = current.getProperty(RDF.rest);
             if (restStmt == null) {
                 throw new TextIndexException("param spec of type set not terminated by rdf:nil");
             }
-            
+
             RDFNode rest = restStmt.getObject();
             if (! rest.isResource()) {
                 throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
             }
-            
+
             current = (Resource) rest;
         }
-        
+
         return result;
     }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index b051252..78cf0c0 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -89,6 +89,12 @@ public class TextVocab
     //GenericAnalyzer
     public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
     public static final Resource definedAnalyzer    = Vocab.resource(NS, "DefinedAnalyzer");
+    public static final Resource typeAnalyzer       = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_ANALYZER);
+    public static final Resource typeBoolean        = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_BOOL);
+    public static final Resource typeFile           = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_FILE);
+    public static final Resource typeInt            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_INT);
+    public static final Resource typeSet            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_SET);
+    public static final Resource typeString         = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_STRING);
     public static final Property pClass             = Vocab.property(NS, "class");
     public static final Property pParams            = Vocab.property(NS, "params");
     public static final Property pParamName         = Vocab.property(NS, "paramName");

http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 87c5d75..3effc39 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -67,13 +67,6 @@ public class TestGenericAnalyzerAssembler {
     
     private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
-    private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
-    private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
-    private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
-    private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
-    private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
-    private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
-    
     static {
         TextAssembler.init();
         Model model = ModelFactory.createDefaultModel();
@@ -106,7 +99,7 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "stopWords")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_SET)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeSet)
                                                   .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
                                           }))
                      ;
@@ -121,14 +114,14 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "defaultAnalyzer")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                   .addProperty(TextVocab.pParamValue, 
                                                                model.createResource()
                                                                .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                ),
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "maxShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 3)
                                           }))
                      ;
@@ -143,34 +136,34 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "delegate")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                   .addProperty(TextVocab.pParamValue, 
                                                                model.createResource()
                                                                .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                ) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "minShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 2) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "maxShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 4) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "tokenSeparator")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                   .addLiteral(TextVocab.pParamValue, "|") ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "outputUnigrams")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                   .addLiteral(TextVocab.pParamValue, false) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                   .addLiteral(TextVocab.pParamValue, true) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "fillerToken")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                   .addLiteral(TextVocab.pParamValue, "foo")
                                           }))
                      ;
@@ -185,7 +178,7 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "stopWords")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeFile)
                                                   .addProperty(TextVocab.pParamValue, FILE_STOPS)
                                           }))
                      ;


[18/18] jena git commit: JENA-1326: Merge commit 'refs/pull/246/head' of github.com:apache/jena

Posted by an...@apache.org.
JENA-1326: Merge commit 'refs/pull/246/head' of github.com:apache/jena

This closes #246.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/31fe2386
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/31fe2386
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/31fe2386

Branch: refs/heads/master
Commit: 31fe23868514541e668ec920802c4c26ab192f9e
Parents: eb4b5b6 ece2f41
Author: Andy Seaborne <an...@apache.org>
Authored: Wed Jun 28 17:06:44 2017 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Wed Jun 28 17:06:44 2017 +0100

----------------------------------------------------------------------
 jena-text/.gitignore                            |   1 +
 .../apache/jena/query/text/analyzer/Util.java   |  16 +
 .../assembler/DefineAnalyzersAssembler.java     | 105 +++++
 .../assembler/DefinedAnalyzerAssembler.java     |  51 +++
 .../assembler/GenericAnalyzerAssembler.java     | 440 +++++++++++++++++++
 .../query/text/assembler/TextAssembler.java     |   2 +
 .../assembler/TextIndexLuceneAssembler.java     |  18 +
 .../jena/query/text/assembler/TextVocab.java    |  25 ++
 .../org/apache/jena/query/text/TS_Text.java     |   2 +
 .../text/assembler/TestEntityMapAssembler.java  |   2 +
 .../assembler/TestGenericAnalyzerAssembler.java | 218 +++++++++
 .../assembler/TestTextDatasetAssembler.java     |   2 +
 .../assembler/TestTextIndexLuceneAssembler.java |   2 +
 jena-text/testing/some-stop-words.txt           |   6 +
 14 files changed, 890 insertions(+)
----------------------------------------------------------------------



[04/18] jena git commit: Merge remote-tracking branch 'apache/master' into generic-text-analyzers grabbing updates to jena-text

Posted by an...@apache.org.
Merge remote-tracking branch 'apache/master' into generic-text-analyzers
grabbing updates to jena-text


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/c429c1d8
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/c429c1d8
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/c429c1d8

Branch: refs/heads/master
Commit: c429c1d8e6fa7a7473001289985a9265fcc4ff37
Parents: 27ea30b a599e48
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Fri Apr 21 08:56:48 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Fri Apr 21 08:56:48 2017 -0500

----------------------------------------------------------------------
 .../jena/sparql/engine/http/HttpQuery.java      | 14 +++++-----
 .../sparql/engine/http/QueryExceptionHTTP.java  | 28 ++++++++++++++++----
 jena-parent/pom.xml                             |  4 ++-
 jena-text/pom.xml                               |  4 ++-
 .../org/apache/jena/query/text/TextIndexES.java |  4 ++-
 .../text/assembler/TextIndexESAssembler.java    |  1 -
 .../assembler/TextIndexLuceneAssembler.java     |  1 +
 .../jena/query/text/it/TextIndexESIT.java       |  4 +--
 8 files changed, 42 insertions(+), 18 deletions(-)
----------------------------------------------------------------------



[16/18] jena git commit: delete text-query.mdtext; update .gitignore; add license and JenaSystem.init()

Posted by an...@apache.org.
delete text-query.mdtext; update .gitignore; add license and
JenaSystem.init()

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/76c3ae3e
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/76c3ae3e
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/76c3ae3e

Branch: refs/heads/master
Commit: 76c3ae3e9f2f35303914258d92eba47eae8e8a75
Parents: 1d687f0
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Jun 28 08:22:52 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Jun 28 08:22:52 2017 -0500

----------------------------------------------------------------------
 jena-text/.gitignore                            |  2 +-
 .../assembler/TestGenericAnalyzerAssembler.java | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/76c3ae3e/jena-text/.gitignore
----------------------------------------------------------------------
diff --git a/jena-text/.gitignore b/jena-text/.gitignore
index f7b49ee..6c7b69a 100644
--- a/jena-text/.gitignore
+++ b/jena-text/.gitignore
@@ -1 +1 @@
-/text-query.mdtext
+.gitignore

http://git-wip-us.apache.org/repos/asf/jena/blob/76c3ae3e/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 3effc39..bcd5a12 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.jena.query.text.assembler;
 
 import static org.junit.Assert.assertEquals;
@@ -6,6 +24,7 @@ import org.apache.jena.rdf.model.Model;
 import org.apache.jena.rdf.model.ModelFactory;
 import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.vocabulary.RDF;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
@@ -68,6 +87,7 @@ public class TestGenericAnalyzerAssembler {
     private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
     static {
+        JenaSystem.init();
         TextAssembler.init();
         Model model = ModelFactory.createDefaultModel();
         


[13/18] jena git commit: ignore extras

Posted by an...@apache.org.
ignore extras

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/fef4d22f
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/fef4d22f
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/fef4d22f

Branch: refs/heads/master
Commit: fef4d22faeda09159cc2523e477571d1d23a85e7
Parents: cc57979
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 11:20:53 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 11:20:53 2017 -0500

----------------------------------------------------------------------
 jena-text/.gitignore | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/fef4d22f/jena-text/.gitignore
----------------------------------------------------------------------
diff --git a/jena-text/.gitignore b/jena-text/.gitignore
index e69de29..f7b49ee 100644
--- a/jena-text/.gitignore
+++ b/jena-text/.gitignore
@@ -0,0 +1 @@
+/text-query.mdtext


[08/18] jena git commit: ignore: organize imports

Posted by an...@apache.org.
ignore: organize imports

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/57ded6a9
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/57ded6a9
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/57ded6a9

Branch: refs/heads/master
Commit: 57ded6a9c1f7d275de4f8e6294611a869407534d
Parents: 94b41be
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 16:15:58 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 16:15:58 2017 -0500

----------------------------------------------------------------------
 .../text/assembler/TestGenericAnalyzerAssembler.java  | 14 --------------
 1 file changed, 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/57ded6a9/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 2ddfa31..87c5d75 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -1,35 +1,21 @@
 package org.apache.jena.query.text.assembler;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
-import java.util.List;
-
-import org.apache.jena.assembler.Assembler;
-import org.apache.jena.atlas.logging.Log;
-import org.apache.jena.atlas.logging.LogCtl;
-import org.apache.jena.query.text.EntityDefinition;
-import org.apache.jena.query.text.TextIndexLucene;
 import org.apache.jena.rdf.model.Model;
 import org.apache.jena.rdf.model.ModelFactory;
-import org.apache.jena.rdf.model.Property;
 import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.vocabulary.RDF;
-import org.apache.jena.vocabulary.RDFS;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestGenericAnalyzerAssembler {
 
-    private static final String TESTBASE = "http://example.org/test/";
     private static final Resource spec1;
     private static final Resource spec2;
     private static final Resource spec3;


[09/18] jena git commit: added analyzer definitions: 1) DefinedAnalyzers for use in text:map; 2) add analyzers to Multilingual support based on BCP47 codes

Posted by an...@apache.org.
added analyzer definitions: 1) DefinedAnalyzers for use in text:map; 2)
add analyzers to Multilingual support based on BCP47 codes

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/a3bb8e41
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/a3bb8e41
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/a3bb8e41

Branch: refs/heads/master
Commit: a3bb8e41aeaf9be3540cf0a6be84cd9dc9b43b28
Parents: 57ded6a
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 16:31:54 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 16:31:54 2017 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/analyzer/Util.java   |   4 +
 .../assembler/DefinedAnalyzerAssembler.java     | 100 +++++++++++++++++++
 .../query/text/assembler/TextAssembler.java     |   1 +
 .../assembler/TextIndexLuceneAssembler.java     |  12 +++
 .../jena/query/text/assembler/TextVocab.java    |   5 +
 5 files changed, 122 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index fb2582a..20c7573 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -51,6 +51,10 @@ public class Util {
             return null;
         }
     }
+    
+    public static void addAnalyzer(String lang, Analyzer analyzer) {
+        cache.put(lang, analyzer);
+    }
 
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
new file mode 100644
index 0000000..e7bd941
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -0,0 +1,100 @@
+package org.apache.jena.query.text.assembler;
+
+import java.util.Hashtable;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefinedAnalyzerAssembler extends AssemblerBase {
+    
+    private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
+    
+    public static void addAnalyzer(Resource key, Analyzer analyzer) {
+        analyzers.put(key, analyzer);
+    }
+    
+    public static boolean addAnalyzers(Assembler a, Resource list) {
+        Resource current = list;
+        boolean isMultilingualSupport = false;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            // process the current list element to add an analyzer 
+            Resource adding = (Resource) first;
+            if (adding.hasProperty(TextVocab.pAnalyzer)) {
+                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+                RDFNode analyzerNode = analyzerStmt.getObject();
+                if (!analyzerNode.isResource()) {
+                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+                }
+                
+                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+                
+                if (adding.hasProperty(TextVocab.pAddLang)) {
+                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+                    String langCode = langStmt.getString();
+                    Util.addAnalyzer(langCode, analyzer);
+                    isMultilingualSupport = true;
+                }
+                
+                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+                    Resource id = defStmt.getResource();
+                    
+                    if (id.getURI() != null) {
+                        DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
+                    } else {
+                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+                    }
+                }
+            } else {
+                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+            }
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return isMultilingualSupport;
+    }
+   
+    @Override
+    public Object open(Assembler a, Resource root, Mode mode) {
+        
+        if (root.hasProperty(TextVocab.pUseAnalyzer)) {
+            Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
+            Resource key = useStmt.getResource();
+            
+            return analyzers.get(key);
+        }
+        
+        return null;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 45f5cee..6cbb2da 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -38,6 +38,7 @@ public class TextAssembler
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.genericAnalyzer,  new GenericAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.definedAnalyzer,  new DefinedAnalyzerAssembler()) ;
 
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 0ec1e5b..7acfb9e 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -115,6 +115,18 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
                 }
                 isMultilingualSupport = mlsNode.asLiteral().getBoolean();
             }
+            
+            Statement defAnalyzersStatement = root.getProperty(pDefAnalyzers);
+            if (null != defAnalyzersStatement) {
+                RDFNode aNode = defAnalyzersStatement.getObject();
+                if (! aNode.isResource()) {
+                    throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+                }
+                boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+                if (addedLangs) {
+                    isMultilingualSupport = true;
+                }
+            }
 
             boolean storeValues = false;
             Statement storeValuesStatement = root.getProperty(pStoreValues);

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index cd1844d..b051252 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -88,10 +88,15 @@ public class TextVocab
 
     //GenericAnalyzer
     public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
+    public static final Resource definedAnalyzer    = Vocab.resource(NS, "DefinedAnalyzer");
     public static final Property pClass             = Vocab.property(NS, "class");
     public static final Property pParams            = Vocab.property(NS, "params");
     public static final Property pParamName         = Vocab.property(NS, "paramName");
     public static final Property pParamType         = Vocab.property(NS, "paramType");
     public static final Property pParamValue        = Vocab.property(NS, "paramValue");
+    public static final Property pDefAnalyzers      = Vocab.property(NS, "defineAnalyzers");
+    public static final Property pDefAnalyzer       = Vocab.property(NS, "defineAnalyzer");
+    public static final Property pAddLang           = Vocab.property(NS, "addLang");
+    public static final Property pUseAnalyzer       = Vocab.property(NS, "useAnalyzer");
 }
 


[07/18] jena git commit: added more tests

Posted by an...@apache.org.
added more tests

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/94b41be7
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/94b41be7
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/94b41be7

Branch: refs/heads/master
Commit: 94b41be7553a4f955c0e41c868d94662bdd7236e
Parents: d2f0561
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 12:29:47 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 12:29:47 2017 -0500

----------------------------------------------------------------------
 .../assembler/TestGenericAnalyzerAssembler.java | 121 +++++++++++++++++--
 jena-text/testing/some-stop-words.txt           |   6 +
 2 files changed, 118 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/94b41be7/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index e2a4893..2ddfa31 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -20,21 +20,22 @@ import org.apache.jena.vocabulary.RDFS;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestGenericAnalyzerAssembler {
-    
-//    // Suppress warnings
-//    @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); }
-//    @AfterClass  public static void afterClass()  { LogCtl.setInfo(EntityDefinitionAssembler.class); }
 
     private static final String TESTBASE = "http://example.org/test/";
     private static final Resource spec1;
     private static final Resource spec2;
     private static final Resource spec3;
+    private static final Resource spec4;
+    private static final Resource spec5;
+    private static final Resource spec6;
     
     @Test public void AnalyzerNullaryCtor() {
         GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
@@ -54,15 +55,38 @@ public class TestGenericAnalyzerAssembler {
         assertEquals(FrenchAnalyzer.class, analyzer.getClass());
     }
     
+    @Test public void AnalyzerCtorAnalyzerInt() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec4, null);
+        assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorShingle7() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec5, null);
+        assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorFile() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec6, null);
+        assertEquals(StopAnalyzer.class, analyzer.getClass());
+    }
+    
     
     private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer";
     private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer";
+    private static final String CLASS_SHINGLE = "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper";
+    private static final String CLASS_STOP = "org.apache.lucene.analysis.core.StopAnalyzer";
+    
+    private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
-    private static final String PARAM_TYPE_BOOL = "boolean";
-    private static final String PARAM_TYPE_FILE = "file";
-    private static final String PARAM_TYPE_INT = "int";
-    private static final String PARAM_TYPE_SET = "set";
-    private static final String PARAM_TYPE_STRING = "string";
+    private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
+    private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
+    private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
+    private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
+    private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
+    private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
     
     static {
         TextAssembler.init();
@@ -100,6 +124,85 @@ public class TestGenericAnalyzerAssembler {
                                                   .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
                                           }))
                      ;
+        
+        // analyzer spec w/ analyzer param and int
+                
+        spec4 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "defaultAnalyzer")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamValue, 
+                                                               model.createResource()
+                                                               .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+                                                               ),
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "maxShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 3)
+                                          }))
+                     ;
+        
+        // analyzer spec w/ seven params of mixed types
+                
+        spec5 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "delegate")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamValue, 
+                                                               model.createResource()
+                                                               .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+                                                               ) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "minShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 2) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "maxShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 4) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "tokenSeparator")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addLiteral(TextVocab.pParamValue, "|") ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "outputUnigrams")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addLiteral(TextVocab.pParamValue, false) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addLiteral(TextVocab.pParamValue, true) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "fillerToken")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addLiteral(TextVocab.pParamValue, "foo")
+                                          }))
+                     ;
+        
+        // analyzer spec w/ one file param
+                
+        spec6 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_STOP)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "stopWords")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+                                                  .addProperty(TextVocab.pParamValue, FILE_STOPS)
+                                          }))
+                     ;
     }
     
     private static Resource strs2list(Model model, String string) {

http://git-wip-us.apache.org/repos/asf/jena/blob/94b41be7/jena-text/testing/some-stop-words.txt
----------------------------------------------------------------------
diff --git a/jena-text/testing/some-stop-words.txt b/jena-text/testing/some-stop-words.txt
new file mode 100644
index 0000000..e648d66
--- /dev/null
+++ b/jena-text/testing/some-stop-words.txt
@@ -0,0 +1,6 @@
+foo
+bar
+baz
+flip
+flop
+mop
\ No newline at end of file


[15/18] jena git commit: Merge remote-tracking branch 'apache/master' into generic-text-analyzers

Posted by an...@apache.org.
Merge remote-tracking branch 'apache/master' into generic-text-analyzers

# Conflicts:
#	jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1d687f01
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1d687f01
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1d687f01

Branch: refs/heads/master
Commit: 1d687f01deb634adc6882b119510901cec73402f
Parents: 6ac75fe eb4b5b6
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Tue Jun 27 10:51:13 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Tue Jun 27 10:51:13 2017 -0500

----------------------------------------------------------------------
 apache-jena/bin/tdbloader2common                |   24 +-
 .../arq/examples/riot/ExRIOT_2.java             |    8 -
 .../arq/examples/riot/ExRIOT_4.java             |    6 +-
 .../arq/examples/riot/ExRIOT_6.java             |    5 +-
 .../arq/examples/riot/ExRIOT_7.java             |    4 +-
 .../apache/jena/atlas/web/TypedInputStream.java |    6 +-
 .../java/org/apache/jena/query/Dataset.java     |    3 +
 .../java/org/apache/jena/riot/RDFDataMgr.java   |  139 +-
 .../java/org/apache/jena/riot/RDFParser.java    |   15 +-
 .../org/apache/jena/riot/RDFParserBuilder.java  |   20 +-
 .../org/apache/jena/riot/RDFParserRegistry.java |   14 +-
 .../java/org/apache/jena/riot/WebContent.java   |   11 +-
 .../apache/jena/riot/lang/ReaderRIOTCSV.java    |    2 +-
 .../org/apache/jena/riot/lang/RiotParsers.java  |    4 +-
 .../jena/riot/system/ErrorHandlerFactory.java   |  132 +-
 .../jena/riot/system/ParserProfileStd.java      |    4 +
 .../org/apache/jena/riot/system/RiotLib.java    |    2 +-
 .../org/apache/jena/riot/tokens/TokenType.java  |    2 +-
 .../java/org/apache/jena/riot/web/HttpOp.java   |    4 +-
 .../apache/jena/riot/web/HttpResponseLib.java   |    7 +-
 .../apache/jena/riot/writer/TurtleShell.java    |    2 +-
 .../jena/sparql/algebra/optimize/Optimize.java  |    4 +-
 .../apache/jena/sparql/core/DatasetGraph.java   |    7 +
 .../jena/sparql/core/DatasetGraphBase.java      |   10 +
 .../jena/sparql/core/DatasetGraphBaseFind.java  |   18 +-
 .../jena/sparql/core/DatasetGraphWrapper.java   |    4 +
 .../apache/jena/sparql/core/DatasetImpl.java    |    5 +
 .../java/org/apache/jena/sparql/core/Quad.java  |   12 +-
 .../org/apache/jena/sparql/core/Substitute.java |    4 +-
 .../sparql/core/mem/DatasetGraphInMemory.java   |    4 +-
 .../jena/sparql/engine/iterator/QueryIter1.java |    6 +-
 .../sparql/engine/iterator/QueryIterRoot.java   |   20 +-
 .../jena/sparql/engine/main/JoinClassifier.java |    4 +-
 .../jena/sparql/engine/main/OpExecutor.java     |    2 +-
 .../sparql/engine/main/QueryEngineMain.java     |    6 +-
 .../jena/sparql/engine/main/VarFinder.java      |    9 +-
 .../org/apache/jena/sparql/engine/ref/Eval.java |    4 +-
 .../jena/sparql/engine/ref/EvaluatorSimple.java |    4 +-
 .../jena/sparql/expr/E_FunctionDynamic.java     |    2 +-
 .../org/apache/jena/sparql/expr/E_Regex.java    |   32 +-
 .../org/apache/jena/sparql/expr/NodeValue.java  |   61 +-
 .../apache/jena/sparql/expr/RegexEngine.java    |    2 +-
 .../org/apache/jena/sparql/expr/RegexJava.java  |   19 +-
 .../apache/jena/sparql/expr/RegexXerces.java    |    5 +
 .../sparql/expr/ValueSpaceClassification.java   |    2 +-
 .../sparql/expr/nodevalue/NodeFunctions.java    |    6 +
 .../sparql/expr/nodevalue/NodeValueSortKey.java |  122 +
 .../sparql/expr/nodevalue/NodeValueVisitor.java |    1 +
 .../jena/sparql/function/StandardFunctions.java |    3 +
 .../sparql/function/library/FN_Collation.java   |   64 +
 .../jena/sparql/graph/GraphUnionRead.java       |   13 +-
 .../jena/sparql/modify/UpdateEngineWorker.java  |   28 +-
 .../sparql/modify/request/UpdateWriter.java     |    2 +
 .../modify/request/UpdateWriterVisitor.java     |   10 +-
 .../jena/sparql/path/eval/PathEngine.java       |    2 +-
 .../jena/sparql/pfunction/library/strSplit.java |   68 +-
 .../jena/sparql/serializer/FmtEltLib.java       |  195 +
 .../sparql/serializer/FormatterElement.java     |  688 +-
 .../sparql/serializer/TriplesListBlock.java     |   43 +
 .../jena/sparql/syntax/ElementPathBlock.java    |    2 +-
 .../jena/sparql/syntax/ElementTriplesBlock.java |    2 +-
 .../org/apache/jena/sparql/util/VarUtils.java   |    8 +-
 .../apache/jena/sparql/util/graph/Findable.java |   19 +-
 .../sparql/util/graph/FindableBasicPattern.java |   89 -
 .../sparql/util/graph/FindableCollection.java   |   68 +
 .../apache/jena/sparql/util/graph/GNode.java    |    8 +-
 .../org/apache/jena/riot/TestRDFParser.java     |   11 +
 .../jena/riot/lang/TestParserFactory.java       |    4 +-
 .../jena/riot/lang/TestPipedRDFIterators.java   |    6 +-
 .../org/apache/jena/riot/lang/TestTriXBad.java  |    4 +-
 .../java/org/apache/jena/sparql/TC_General.java |    2 +
 .../jena/sparql/algebra/TestVarFinder.java      |    5 +
 .../org/apache/jena/sparql/api/TestAPI.java     |   12 +
 .../sparql/core/AbstractDatasetGraphFind.java   |   53 +-
 .../mem/TestDatasetGraphInMemoryPromote.java    |    5 +
 .../jena/sparql/expr/TestE_FunctionDynamic.java |   36 +
 .../apache/jena/sparql/expr/TestNodeValue.java  |  124 +
 .../org/apache/jena/sparql/expr/TestRegex.java  |   63 +-
 .../expr/nodevalue/TestNodeFunctions.java       |   40 +
 .../expr/nodevalue/TestNodeValueSortKey.java    |   86 +
 .../function/library/TestFunctionCollation.java |   70 +
 .../sparql/pfunction/library/TS_PFunction.java  |   30 +
 .../sparql/pfunction/library/TestStrSplit.java  |  158 +
 .../transaction/AbstractTestTransPromote.java   |  116 +-
 .../apache/jena/atlas/io/IndentedWriter.java    |    1 -
 .../java/org/apache/jena/atlas/lib/Creator.java |    4 +-
 .../java/arq/cmdline/ModDatasetAssembler.java   |    4 +
 jena-cmds/src/main/java/arq/uparse.java         |   70 +-
 .../src/main/java/riotcmd/CmdLangParse.java     |   61 +-
 jena-cmds/src/main/java/riotcmd/riot.java       |   12 +-
 jena-cmds/src/main/java/tdb/tools/dumpbpt.java  |   34 +-
 .../src/main/java/tdb/tools/dumpjournal.java    |   81 +
 .../src/main/java/tdb/tools/dumpnodetable.java  |   68 +-
 .../src/main/java/tdb/tools/dumpnodetable1.java |  184 +
 .../main/java/org/apache/jena/graph/Graph.java  |   20 +-
 .../org/apache/jena/rdf/model/impl/Util.java    |    4 +
 .../apache/jena/hadoop/rdf/stats/RdfStats.java  |   13 +-
 jena-extras/jena-querybuilder/pom.xml           |    4 +
 .../arq/querybuilder/AbstractQueryBuilder.java  |   17 +-
 .../jena/arq/querybuilder/AskBuilder.java       |   26 +-
 .../jena/arq/querybuilder/ConstructBuilder.java |   25 +-
 .../jena/arq/querybuilder/DescribeBuilder.java  |  316 +
 .../jena/arq/querybuilder/ExprFactory.java      | 1783 ++++
 .../jena/arq/querybuilder/SelectBuilder.java    |   47 +-
 .../jena/arq/querybuilder/UpdateBuilder.java    | 1027 ++
 .../arq/querybuilder/clauses/PrologClause.java  |    8 +
 .../arq/querybuilder/clauses/SelectClause.java  |   22 +-
 .../clauses/SolutionModifierClause.java         |   14 +
 .../arq/querybuilder/clauses/WhereClause.java   |   99 +-
 .../arq/querybuilder/handlers/HandlerBlock.java |    2 +-
 .../querybuilder/handlers/PrologHandler.java    |    5 +
 .../arq/querybuilder/handlers/WhereHandler.java |  231 +-
 .../rewriters/NodeValueRewriter.java            |    5 +
 .../updatebuilder/PrefixHandler.java            |  115 +
 .../updatebuilder/QBQuadHolder.java             |   79 +
 .../querybuilder/updatebuilder/QuadHolder.java  |   46 +
 .../updatebuilder/QuadIteratorBuilder.java      |  180 +
 .../updatebuilder/SingleQuadHolder.java         |   97 +
 .../updatebuilder/WhereProcessor.java           |  433 +
 .../jena/arq/AbstractRegexpBasedTest.java       |    4 +
 .../querybuilder/AbstractQueryBuilderTest.java  |   18 +-
 .../jena/arq/querybuilder/AskBuilderTest.java   |   42 +-
 .../arq/querybuilder/ConstructBuilderTest.java  |   41 +-
 .../DescribeBuilderContractTest.java            |   53 +
 .../jena/arq/querybuilder/ExprFactoryTest.java  |  701 ++
 .../arq/querybuilder/SelectBuilderTest.java     |   98 +-
 .../querybuilder/UpdateBuilderExampleTests.java |  625 ++
 .../arq/querybuilder/UpdateBuilderTest.java     |  411 +
 .../jena/arq/querybuilder/WhereValidator.java   |  205 +
 .../querybuilder/clauses/SelectClauseTest.java  |   70 +-
 .../querybuilder/clauses/WhereClauseTest.java   |  203 +-
 .../querybuilder/handlers/WhereHandlerTest.java |  337 +-
 jena-fuseki1/pom.xml                            |    2 +-
 .../jena/fuseki/migrate/GraphLoadUtils.java     |    8 +-
 jena-fuseki2/jena-fuseki-basic/pom.xml          |  256 +
 jena-fuseki2/jena-fuseki-basic/run-sparqler     |    8 +
 .../jena-fuseki-basic/sparqler/data/books.ttl   |   46 +
 .../jena-fuseki-basic/sparqler/data/empty.nt    |    1 +
 .../sparqler/data/sparql-data.ttl               |  101 +
 .../sparqler/data/underground.ttl               | 8996 ++++++++++++++++++
 .../sparqler/log4j-foreground.properties        |   44 +
 .../sparqler/log4j-server.properties            |   44 +
 .../jena-fuseki-basic/sparqler/pages/D.nt       |    0
 .../jena-fuseki-basic/sparqler/pages/D.rdf      |    8 +
 .../jena-fuseki-basic/sparqler/pages/D.ttl      |    3 +
 .../sparqler/pages/crossdomain.xml              |    3 +
 .../sparqler/pages/data-validator.html          |   48 +
 .../jena-fuseki-basic/sparqler/pages/doc.css    |  127 +
 .../jena-fuseki-basic/sparqler/pages/fuseki.css |  148 +
 .../jena-fuseki-basic/sparqler/pages/index.html |   73 +
 .../sparqler/pages/iri-validator.html           |   22 +
 .../sparqler/pages/query-validator.html         |   71 +
 .../jena-fuseki-basic/sparqler/pages/query.html |   63 +
 .../jena-fuseki-basic/sparqler/pages/robots.txt |    2 +
 .../sparqler/pages/sparql.html                  |   51 +
 .../sparqler/pages/update-validator.html        |   46 +
 .../sparqler/pages/update.html                  |   27 +
 .../sparqler/pages/validator.html               |   35 +
 .../sparqler/pages/xml-to-html-links.xsl        |  183 +
 .../sparqler/pages/xml-to-html-plain.xsl        |  187 +
 .../sparqler/pages/xml-to-html.xsl              |  187 +
 .../jena-fuseki-basic/sparqler/run-sparqler     |   64 +
 .../apache/jena/fuseki/cmds/FusekiBasicCmd.java |  485 +
 .../apache/jena/fuseki/cmds/PlatformInfo.java   |  122 +
 jena-fuseki2/jena-fuseki-core/pom.xml           |   13 -
 .../java/org/apache/jena/fuseki/Fuseki.java     |    4 +-
 .../java/org/apache/jena/fuseki/FusekiLib.java  |   13 +-
 .../org/apache/jena/fuseki/build/Template.java  |    1 +
 .../org/apache/jena/fuseki/cmd/FusekiCmd.java   |   28 +-
 .../apache/jena/fuseki/mgt/ActionDatasets.java  |   22 +-
 .../jena/fuseki/migrate/GraphLoadUtils.java     |    8 +-
 .../apache/jena/fuseki/server/FusekiEnv.java    |   13 +-
 .../apache/jena/fuseki/server/FusekiInfo.java   |  160 +
 .../jena/fuseki/server/FusekiInitialConfig.java |   61 +
 .../apache/jena/fuseki/server/FusekiServer.java |    4 +-
 .../fuseki/server/FusekiServerListener.java     |    5 +-
 .../jena/fuseki/server/ServerInitialConfig.java |   56 -
 .../jena/fuseki/servlets/SPARQL_GSP_RW.java     |    7 +-
 .../fuseki/servlets/SPARQL_QueryGeneral.java    |    4 +-
 .../jena/fuseki/validation/DataValidator.java   |  113 +-
 .../jena/fuseki/validation/IRIValidator.java    |  155 +-
 .../jena/fuseki/validation/QueryValidator.java  |  138 +-
 .../jena/fuseki/validation/UpdateValidator.java |   75 +-
 .../fuseki/validation/ValidationAction.java     |   95 -
 .../jena/fuseki/validation/ValidationError.java |   24 -
 .../jena/fuseki/validation/ValidatorBase.java   |  176 +
 .../fuseki/validation/ValidatorBaseJson.java    |   25 +-
 .../validation/html/DataValidatorHTML.java      |  220 +
 .../validation/html/IRIValidatorHTML.java       |  103 +
 .../validation/html/QueryValidatorHTML.java     |  234 +
 .../validation/html/UpdateValidatorHTML.java    |  142 +
 .../validation/html/ValidatorHtmlLib.java       |  117 +
 .../validation/json/DataValidatorJSON.java      |   94 +
 .../validation/json/IRIValidatorJSON.java       |  104 +
 .../validation/json/QueryValidatorJSON.java     |  157 +
 .../validation/json/UpdateValidatorJSON.java    |   94 +
 .../validation/json/ValidationAction.java       |   95 +
 .../fuseki/validation/json/ValidationError.java |   24 +
 .../validation/json/ValidatorJsonLib.java       |   85 +
 .../src/main/webapp/WEB-INF/web.xml             |   45 +-
 .../java/org/apache/jena/fuseki/ServerCtl.java  |   25 +-
 .../java/org/apache/jena/fuseki/TestAuth.java   |    3 +-
 .../org/apache/jena/fuseki/TestDatasetOps.java  |   18 +-
 jena-fuseki2/jena-fuseki-embedded/pom.xml       |   32 +-
 .../fuseki/embedded/FusekiEmbeddedServer.java   |  125 +-
 .../jena/fuseki/embedded/FusekiTestAuth.java    |  191 +
 .../jena/fuseki/embedded/FusekiTestServer.java  |  123 +-
 .../jena/fuseki/embedded/TS_EmbeddedFuseki.java |    2 +
 .../fuseki/embedded/TestEmbeddedFuseki.java     |   43 +-
 .../fuseki/embedded/TestFusekiTestAuth.java     |  100 +
 .../fuseki/embedded/TestFusekiTestServer.java   |   60 +
 .../fuseki/embedded/TestMultipleEmbedded.java   |   16 +-
 .../testing/FusekiEmbedded/test.txt             |    1 +
 jena-fuseki2/pom.xml                            |    4 +-
 jena-integration-tests/pom.xml                  |    2 +-
 .../rdfconnection/TestRDFConnectionRemote.java  |    1 -
 jena-jdbc/jena-jdbc-driver-bundle/pom.xml       |    4 +-
 jena-jdbc/jena-jdbc-driver-remote/pom.xml       |  164 +-
 .../jena/jdbc/remote/TS_JdbcDriverRemote.java   |    6 +-
 .../TestRemoteEndpointConnection.java           |   16 +-
 .../TestRemoteEndpointConnectionWithAuth.java   |   57 +-
 ...stRemoteEndpointConnectionWithGraphUris.java |   16 +-
 ...oteEndpointConnectionWithResultSetTypes.java |   16 +-
 .../metadata/TestRemoteConnectionMetadata.java  |   12 +-
 .../results/TestRemoteEndpointResults.java      |   18 +-
 .../TestRemoteEndpointResultsWithAuth.java      |   49 +-
 .../TestRemoteEndpointResultsWithGraphUris.java |   18 +-
 ...RemoteEndpointResultsWithResultSetTypes.java |   18 +-
 .../TestRemoteEndpointStatements.java           |   12 +-
 jena-parent/pom.xml                             |   15 +-
 .../example/readonly/ReadOnlyEval.java          |  104 +
 .../example/readonly/package-info.java          |   22 +
 .../graph/SecuredGraphEventManager.java         |    6 +-
 .../apache/jena/sdb/engine/QueryEngineSDB.java  |    2 +-
 .../jena/tdb/base/block/BlockMgrCache.java      |   10 +-
 .../base/recordbuffer/RecordBufferPageMgr.java  |    5 +-
 .../jena/tdb/setup/DatasetBuilderStd.java       |    4 +-
 .../jena/tdb/transaction/BlockMgrJournal.java   |   15 +-
 .../transaction/DatasetGraphTransaction.java    |   12 +-
 .../apache/jena/tdb/transaction/Journal.java    |    6 -
 .../jena/tdb/transaction/JournalControl.java    |   18 +-
 .../jena/tdb/transaction/JournalEntry.java      |   19 +-
 .../jena/tdb/transaction/NodeTableTrans.java    |    2 +-
 .../jena/tdb/transaction/Transaction.java       |   15 +-
 .../tdb/transaction/TransactionManager.java     |    2 +
 .../tdb/transaction/AbstractTestTransSeq.java   |    3 +-
 .../tdb/transaction/TestTransPromoteTDB.java    |    6 +
 jena-text-es/pom.xml                            |    2 +-
 .../jena/query/text/es/InitJenaTextES.java      |    9 +-
 .../org/apache/jena/query/text/es/TextES.java   |   47 +
 .../apache/jena/query/text/es/TextIndexES.java  |    2 +-
 .../apache/jena/query/text/es/TextVocabES.java  |   35 +
 .../text/es/assembler/TextAssemblerES.java      |   37 +
 .../text/es/assembler/TextIndexESAssembler.java |    5 +-
 jena-text-es/src/main/resources/META-INF/NOTICE |    2 +-
 .../apache/jena/query/text/InitJenaText.java    |   11 +
 jena-text/src/main/resources/text-config-es.ttl |   64 -
 257 files changed, 24322 insertions(+), 2356 deletions(-)
----------------------------------------------------------------------



[12/18] jena git commit: Merge remote-tracking branch 'apache/master' into generic-text-analyzers merging jena-tex-es changes

Posted by an...@apache.org.
Merge remote-tracking branch 'apache/master' into generic-text-analyzers
merging jena-tex-es changes


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/cc579790
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/cc579790
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/cc579790

Branch: refs/heads/master
Commit: cc579790631d92499ef44717e540ceaefcfe1d89
Parents: 5edb6c8 ea5e358
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 11:17:20 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 11:17:20 2017 -0500

----------------------------------------------------------------------
 .../apache/jena/query/ResultSetCloseable.java   |  2 +-
 .../jena/sparql/engine/ResultSetWrapper.java    | 53 ++-----------------
 .../jena/sparql/resultset/ResultSetMem.java     |  3 +-
 .../jena/sparql/resultset/ResultSetWrapper.java |  5 +-
 .../syntaxtransform/QueryTransformOps.java      | 23 +++++++--
 .../syntaxtransform/TransformElementLib.java    |  9 ++++
 .../syntaxtransform/UpdateTransformOps.java     | 24 ++++++++-
 .../org/apache/jena/sparql/util/ModelUtils.java | 36 +++++++++----
 .../syntaxtransform/TestSyntaxTransform.java    | 54 +++++++++++++++++---
 9 files changed, 132 insertions(+), 77 deletions(-)
----------------------------------------------------------------------



[05/18] jena git commit: adding GenericAnalyzer tests

Posted by an...@apache.org.
adding GenericAnalyzer tests

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/8f1fa7cc
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/8f1fa7cc
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/8f1fa7cc

Branch: refs/heads/master
Commit: 8f1fa7ccbf2cb05f2eed121831c39e07260ec18b
Parents: c429c1d
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Fri Apr 21 16:02:20 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Fri Apr 21 16:02:20 2017 -0500

----------------------------------------------------------------------
 .../assembler/GenericAnalyzerAssembler.java     |   2 +-
 .../org/apache/jena/query/text/TS_Text.java     |   2 +
 .../assembler/TestGenericAnalyzerAssembler.java | 116 +++++++++++++++++++
 3 files changed, 119 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/8f1fa7cc/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 7fb04cc..0fa706e 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -167,7 +167,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 	            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
 	        }
 	    } else {
-	        throw new TextIndexException("text:class property is required by GenericAnalyzer");
+	        throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
 	    }
 	}
 

http://git-wip-us.apache.org/repos/asf/jena/blob/8f1fa7cc/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 7259b11..91663f2 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -21,6 +21,7 @@ package org.apache.jena.query.text;
 import org.apache.jena.query.text.assembler.TestEntityMapAssembler;
 import org.apache.jena.query.text.assembler.TestTextDatasetAssembler;
 import org.apache.jena.query.text.assembler.TestTextIndexLuceneAssembler;
+import org.apache.jena.query.text.assembler.TestGenericAnalyzerAssembler;
 import org.junit.runner.RunWith;
 import org.junit.runners.Suite;
 import org.junit.runners.Suite.SuiteClasses;
@@ -47,6 +48,7 @@ import org.junit.runners.Suite.SuiteClasses;
     , TestDatasetWithConfigurableAnalyzer.class
     , TestDatasetWithAnalyzingQueryParser.class
     , TestDatasetWithComplexPhraseQueryParser.class
+    , TestGenericAnalyzerAssembler.class
 })
 
 public class TS_Text

http://git-wip-us.apache.org/repos/asf/jena/blob/8f1fa7cc/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
new file mode 100644
index 0000000..e2a4893
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -0,0 +1,116 @@
+package org.apache.jena.query.text.assembler;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.logging.Log;
+import org.apache.jena.atlas.logging.LogCtl;
+import org.apache.jena.query.text.EntityDefinition;
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Property;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.jena.vocabulary.RDFS;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestGenericAnalyzerAssembler {
+    
+//    // Suppress warnings
+//    @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); }
+//    @AfterClass  public static void afterClass()  { LogCtl.setInfo(EntityDefinitionAssembler.class); }
+
+    private static final String TESTBASE = "http://example.org/test/";
+    private static final Resource spec1;
+    private static final Resource spec2;
+    private static final Resource spec3;
+    
+    @Test public void AnalyzerNullaryCtor() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec1, null);
+        assertEquals(SimpleAnalyzer.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerNullaryCtor2() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec2, null);
+        assertEquals(FrenchAnalyzer.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorSet1() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec3, null);
+        assertEquals(FrenchAnalyzer.class, analyzer.getClass());
+    }
+    
+    
+    private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer";
+    private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer";
+    
+    private static final String PARAM_TYPE_BOOL = "boolean";
+    private static final String PARAM_TYPE_FILE = "file";
+    private static final String PARAM_TYPE_INT = "int";
+    private static final String PARAM_TYPE_SET = "set";
+    private static final String PARAM_TYPE_STRING = "string";
+    
+    static {
+        TextAssembler.init();
+        Model model = ModelFactory.createDefaultModel();
+        
+        // analyzer spec w/ no params
+                
+        spec1 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SIMPLE)
+                     ;
+        
+        // analyzer spec w/ empty params
+                
+        spec2 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_FRENCH)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { } )
+                                  )
+                     ;
+        
+        // analyzer spec w/ one set param
+                
+        spec3 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_FRENCH)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "stopWords")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_SET)
+                                                  .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
+                                          }))
+                     ;
+    }
+    
+    private static Resource strs2list(Model model, String string) {
+        String[] members = string.split("\\s");
+        Resource current = RDF.nil;
+        for (int i = members.length-1; i>=0; i--) {
+            Resource previous = current;
+            current = model.createResource();
+            current.addProperty(RDF.rest, previous);
+            current.addProperty(RDF.first, members[i]);            
+        }
+        return current;    
+    }
+}


[06/18] jena git commit: added parameters of type org.apache.lucene.analysis.Analyzer

Posted by an...@apache.org.
added parameters of type org.apache.lucene.analysis.Analyzer

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/d2f0561b
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/d2f0561b
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/d2f0561b

Branch: refs/heads/master
Commit: d2f0561b99c957658261b3693e4a89892369a65a
Parents: 8f1fa7c
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 12:29:04 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 12:29:04 2017 -0500

----------------------------------------------------------------------
 .../assembler/GenericAnalyzerAssembler.java     | 61 ++++++++++++++++----
 1 file changed, 50 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/d2f0561b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 0fa706e..853fcb6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -48,6 +48,7 @@ import org.apache.lucene.analysis.CharArraySet;
  *     file      java.io.FileReader
  *     int       int
  *     boolean   boolean
+ *     analyzer  org.apache.lucene.analysis.Analyzer
  * </pre>
  * 
  * Although the list of types is not exhaustive it is a simple matter
@@ -83,7 +84,7 @@ import org.apache.lucene.analysis.CharArraySet;
  * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
  * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
  * <p>
- * Example:
+ * Examples:
  * <pre>
     text:map (
          [ text:field "text" ; 
@@ -101,6 +102,23 @@ import org.apache.lucene.analysis.CharArraySet;
                     )
            ] .
  * </pre>
+ * <pre>
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:GenericAnalyzer ;
+               text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
+               text:params (
+                    [ text:paramName "defaultAnalyzer" ;
+                      text:paramType "analyzer" ;
+                      text:paramValue [ a text:SimpleAnalyzer ] ]
+                    [ text:paramName "maxShingleSize" ;
+                      text:paramType "int" ;
+                      text:paramValue 3 ]
+                    )
+           ] .
+ * </pre>
  */
 public class GenericAnalyzerAssembler extends AssemblerBase {
     /*
@@ -121,7 +139,14 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
            ] .
      */
 
-	@Override
+    public static final String TYPE_ANALYZER = "analyzer";
+    public static final String TYPE_BOOL = "boolean";
+    public static final String TYPE_FILE = "file";
+    public static final String TYPE_INT = "int";
+    public static final String TYPE_SET = "set";
+    public static final String TYPE_STRING = "string";
+
+    @Override
 	public Analyzer open(Assembler a, Resource root, Mode mode) {
 	    if (root.hasProperty(TextVocab.pClass)) {
 	        // text:class is expected to be a string literal
@@ -242,7 +267,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
         switch (type) {
 
         // String
-        case "string": {
+        case TYPE_STRING: {
             if (value == null) {
                 throw new TextIndexException("Value for string param: " + name + " must not be empty!");
             }
@@ -250,8 +275,8 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
             return new ParamSpec(name, value, String.class);
         }
         
-        // "java.io.FileReader":
-        case "file": {
+        // java.io.FileReader
+        case TYPE_FILE: {
 
             if (value == null) {
                 throw new TextIndexException("Value for file param must exist and must contain a file name.");
@@ -267,8 +292,8 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
             }
         }
         
-        // "org.apache.lucene.analysis.util.CharArraySet":
-        case "set": {
+        // org.apache.lucene.analysis.util.CharArraySet
+        case TYPE_SET: {
             if (valueStmt == null) {
                 throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
             }
@@ -283,8 +308,8 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
             return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
         }
         
-        // "int":
-        case "int":
+        // int
+        case TYPE_INT:
             if (value == null) {
                 throw new TextIndexException("Value for int param: " + name + " must not be empty!");
             }
@@ -292,8 +317,8 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
             int n = ((Literal) valueStmt.getObject()).getInt();
             return new ParamSpec(name, n, int.class);
 
-        // "boolean":
-        case "boolean":
+        // boolean
+        case TYPE_BOOL:
             if (value == null) {
                 throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
             }
@@ -301,6 +326,20 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
             boolean b = ((Literal) valueStmt.getObject()).getBoolean();
             return new ParamSpec(name, b, boolean.class);
         
+        // org.apache.lucene.analysis.Analyzer
+        case TYPE_ANALYZER:
+            if (valueStmt == null) {
+                throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
+            }
+            
+            RDFNode valueNode = valueStmt.getObject();
+            if (!valueNode.isResource()) {
+                throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
+            }
+            
+            Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
+            return new ParamSpec(name, analyzer, Analyzer.class);
+        
         default:
             // there was no match
             Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);


[03/18] jena git commit: implement GenericAnalyzerAssembler. TO DO: Tests

Posted by an...@apache.org.
implement GenericAnalyzerAssembler. TO DO: Tests

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/27ea30b7
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/27ea30b7
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/27ea30b7

Branch: refs/heads/master
Commit: 27ea30b73855d7a3cf0cd9561d2089295ec03353
Parents: 8b3757b
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Thu Apr 20 15:37:00 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Thu Apr 20 15:37:00 2017 -0500

----------------------------------------------------------------------
 .../assembler/GenericAnalyzerAssembler.java     | 332 +++++++++++++++++--
 .../query/text/assembler/TextAssembler.java     |   2 +-
 .../jena/query/text/assembler/TextVocab.java    |   8 +-
 3 files changed, 318 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/27ea30b7/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index db707d2..7fb04cc 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -18,10 +18,24 @@
 
 package org.apache.jena.query.text.assembler;
 
+import java.io.Reader;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.jena.assembler.Assembler;
 import org.apache.jena.assembler.Mode;
 import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.atlas.logging.Log ;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.rdf.model.Literal;
+import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Creates generic analyzers given a fully qualified Class name and a list
@@ -64,10 +78,29 @@ import org.apache.jena.rdf.model.Resource;
  * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
  * </ul>
  * <p>
- * A parameter of type <code>set</code> <i>may have</i> zero or more <code>text:paramValue</code>s.
+ * A parameter of type <code>set</code> <i>must have</i> a list of zero or more <code>String</code>s.
  * <p>
  * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
- * <code>int</code> <i>must have</i> a single <code>text:paramValue</code>
+ * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
+ * <p>
+ * Example:
+ * <pre>
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:GenericAnalyzer ;
+               text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
+               text:params (
+                    [ text:paramName "stopwords" ;
+                      text:paramType "set" ;
+                      text:paramValue ("the" "a" "an") ]
+                    [ text:paramName "stemExclusionSet" ;
+                      text:paramType "set" ;
+                      text:paramValue ("ing" "ed") ]
+                    )
+           ] .
+ * </pre>
  */
 public class GenericAnalyzerAssembler extends AssemblerBase {
     /*
@@ -77,29 +110,284 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
            text:analyzer [
                a text:GenericAnalyzer ;
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
-               text:params [
-                    a rdf:seq ;
-                    rdf:_1 [
-                        text:paramName "stopwords" ;
-                        text:paramType "set" ;
-                        text:paramValue "the", "a", "an" ] ;
-                    rdf:_2 [
-                        text:paramName "stemExclusionSet" ;
-                        text:paramType "set" ;
-                        text:paramValue "ing", "ed" ]
-                    ]
-                ]
-          ] .
+               text:params (
+                    [ text:paramName "stopwords" ;
+                      text:paramType "set" ;
+                      text:paramValue ("the" "a" "an") ]
+                    [ text:paramName "stemExclusionSet" ;
+                      text:paramType "set" ;
+                      text:paramValue ("ing" "ed") ]
+                    )
+           ] .
      */
 
-	public GenericAnalyzerAssembler() {
-		// TODO Auto-generated constructor stub
-	}
-
 	@Override
-	public Object open(Assembler a, Resource root, Mode mode) {
-		// TODO Auto-generated method stub
-		return null;
+	public Analyzer open(Assembler a, Resource root, Mode mode) {
+	    if (root.hasProperty(TextVocab.pClass)) {
+	        // text:class is expected to be a string literal
+	        String className = root.getProperty(TextVocab.pClass).getString();
+
+	        // is the class accessible?
+	        Class<?> clazz = null;
+	        try {
+	            clazz = Class.forName(className);
+	        } catch (ClassNotFoundException e) {
+	            Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
+	            return null;
+	        }
+
+	        // Is the class an Analyzer?
+	        if (!Analyzer.class.isAssignableFrom(clazz)) {
+	            Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
+	            return null;
+	        }
+	        
+	        if (root.hasProperty(TextVocab.pParams)) {
+	            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
+	            if (! node.isResource()) {
+	                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
+	            }
+
+	            List<ParamSpec> specs = getParamSpecs((Resource) node);
+
+	            // split the param specs into classes and values for constructor lookup
+	            final Class<?> paramClasses[] = new Class<?>[specs.size()];
+	            final Object paramValues[] = new Object[specs.size()];
+	            for (int i = 0; i < specs.size(); i++) {
+	                ParamSpec spec = specs.get(i);
+	                paramClasses[i] = spec.getValueClass();
+	                paramValues[i] = spec.getValue();
+	            }
+
+	            // Create new analyzer
+	            return newAnalyzer(clazz, paramClasses, paramValues);
+
+	        } else {
+	            // use the nullary Analyzer constructor
+	            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
+	        }
+	    } else {
+	        throw new TextIndexException("text:class property is required by GenericAnalyzer");
+	    }
 	}
 
+    /**
+     * Create instance of the Lucene Analyzer, <code>class</code>, with provided parameters
+     *
+     * @param clazz The analyzer class
+     * @param paramClasses The parameter classes
+     * @param paramValues The parameter values
+     * @return The lucene analyzer
+     */
+    private Analyzer newAnalyzer(Class<?> clazz, Class<?>[] paramClasses, Object[] paramValues) {
+
+        String className = clazz.getName();
+
+        try {
+            final Constructor<?> cstr = clazz.getDeclaredConstructor(paramClasses);
+
+            return (Analyzer) cstr.newInstance(paramValues);
+
+        } catch (IllegalArgumentException | IllegalAccessException | InstantiationException | InvocationTargetException | SecurityException e) {
+            Log.error(this, "Exception while instantiating analyzer class " + className + ". " + e.getMessage(), e);
+        } catch (NoSuchMethodException ex) {
+            Log.error(this, "Could not find matching analyzer class constructor for " + className + " " + ex.getMessage(), ex);
+        }
+
+        return null;
+    }
+    
+    private List<ParamSpec> getParamSpecs(Resource list) {
+        List<ParamSpec> result = new ArrayList<>();
+        Resource current = list;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            result.add(getParamSpec((Resource) first));
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return result;
+    }
+    
+    private ParamSpec getParamSpec(Resource node) {
+        Statement nameStmt = node.getProperty(TextVocab.pParamName);
+        Statement typeStmt = node.getProperty(TextVocab.pParamType);
+        Statement valueStmt = node.getProperty(TextVocab.pParamValue);
+        
+        String name = getStringValue(nameStmt);
+        String type = getStringValue(typeStmt);
+        String value = getStringValue(valueStmt);
+
+        switch (type) {
+
+        // String
+        case "string": {
+            if (value == null) {
+                throw new TextIndexException("Value for string param: " + name + " must not be empty!");
+            }
+
+            return new ParamSpec(name, value, String.class);
+        }
+        
+        // "java.io.FileReader":
+        case "file": {
+
+            if (value == null) {
+                throw new TextIndexException("Value for file param must exist and must contain a file name.");
+            }
+
+            try {
+                // The analyzer is responsible for closing the file
+                Reader fileReader = new java.io.FileReader(value);
+                return new ParamSpec(name, fileReader, Reader.class);
+
+            } catch (java.io.FileNotFoundException ex) {
+                throw new TextIndexException("File " + value + " for param " + name + " not found!");
+            }
+        }
+        
+        // "org.apache.lucene.analysis.util.CharArraySet":
+        case "set": {
+            if (valueStmt == null) {
+                throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
+            }
+            
+            RDFNode valueNode = valueStmt.getObject();
+            if (!valueNode.isResource()) {
+                throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode);
+            }
+            
+            List<String> values = toStrings((Resource) valueNode);
+
+            return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
+        }
+        
+        // "int":
+        case "int":
+            if (value == null) {
+                throw new TextIndexException("Value for int param: " + name + " must not be empty!");
+            }
+
+            int n = ((Literal) valueStmt.getObject()).getInt();
+            return new ParamSpec(name, n, int.class);
+
+        // "boolean":
+        case "boolean":
+            if (value == null) {
+                throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
+            }
+
+            boolean b = ((Literal) valueStmt.getObject()).getBoolean();
+            return new ParamSpec(name, b, boolean.class);
+        
+        default:
+            // there was no match
+            Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
+            break;
+        }
+
+        return null;
+    }
+    
+    private String getStringValue(Statement stmt) {
+        if (stmt == null) {
+            return null;
+        } else {
+            RDFNode node = stmt.getObject();
+            if (node.isLiteral()) {
+                return ((Literal) node).getLexicalForm();
+            } else {
+                return null;
+            }
+        }
+    }
+
+    private List<String> toStrings(Resource list) {
+        List<String> result = new ArrayList<>();
+        Resource current = list;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("param spec of type set not well formed");
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isLiteral()) {
+                throw new TextIndexException("param spec of type set item is not a literal: " + first);
+            }
+            
+            result.add(((Literal)first).getLexicalForm());
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("param spec of type set not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return result;
+    }
+
+    /**
+     * <code>ParamSpec</code> contains the <code>name</code>, <code>Class</code>, and 
+     * <code>value</code> of a parameter for a constructor (or really any method in general)
+     */
+    private static final class ParamSpec {
+
+        private final String name;
+        private final Object value;
+        private final Class<?> clazz;
+
+        @SuppressWarnings("unused")
+        public ParamSpec(String key, Object value) {
+            this(key, value, value.getClass());
+        }
+
+        public ParamSpec(String key, Object value, Class<?> clazz) {
+            this.name = key;
+            this.value = value;
+            this.clazz = clazz;
+        }
+
+        @SuppressWarnings("unused")
+        public String getKey() {
+            return name;
+        }
+
+        public Object getValue() {
+            return value;
+        }
+
+        public Class<?> getValueClass() {
+            return clazz;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/27ea30b7/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 636c6bc..45f5cee 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -37,7 +37,7 @@ public class TextAssembler
         Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
-        Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.genericAnalyzer,  new GenericAnalyzerAssembler()) ;
 
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/27ea30b7/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index bc49d10..cd1844d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -66,7 +66,6 @@ public class TextVocab
     public static final Resource lowerCaseKeywordAnalyzer    = Vocab.resource(NS, "LowerCaseKeywordAnalyzer");
     public static final Resource localizedAnalyzer    = Vocab.resource(NS, "LocalizedAnalyzer");
     public static final Resource configurableAnalyzer = Vocab.resource(NS, "ConfigurableAnalyzer");
-    public static final Resource genericAnalyzer   = Vocab.resource(NS, "GenericAnalyzer");
     
     // Tokenizers
     public static final Resource standardTokenizer  = Vocab.resource(NS, "StandardTokenizer");
@@ -87,5 +86,12 @@ public class TextVocab
     public static final Property pReplicas          = Vocab.property(NS, "replicas");
     public static final Property pIndexName          = Vocab.property(NS, "indexName");
 
+    //GenericAnalyzer
+    public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
+    public static final Property pClass             = Vocab.property(NS, "class");
+    public static final Property pParams            = Vocab.property(NS, "params");
+    public static final Property pParamName         = Vocab.property(NS, "paramName");
+    public static final Property pParamType         = Vocab.property(NS, "paramType");
+    public static final Property pParamValue        = Vocab.property(NS, "paramValue");
 }
 


[17/18] jena git commit: added JenaSystem.init() to other jena-text tests

Posted by an...@apache.org.
added JenaSystem.init() to other jena-text tests

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/ece2f41d
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/ece2f41d
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/ece2f41d

Branch: refs/heads/master
Commit: ece2f41d8a381ee961dc61e8fafd66f5925bb0fa
Parents: 76c3ae3
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Jun 28 09:36:29 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Jun 28 09:36:29 2017 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/assembler/TestEntityMapAssembler.java   | 2 ++
 .../apache/jena/query/text/assembler/TestTextDatasetAssembler.java | 2 ++
 .../jena/query/text/assembler/TestTextIndexLuceneAssembler.java    | 2 ++
 3 files changed, 6 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/ece2f41d/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
index cdf5dcc..99a3e56 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
@@ -32,6 +32,7 @@ import org.apache.jena.query.text.TextIndexException ;
 import org.apache.jena.query.text.analyzer.ConfigurableAnalyzer ;
 import org.apache.jena.query.text.analyzer.LowerCaseKeywordAnalyzer ;
 import org.apache.jena.rdf.model.* ;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.vocabulary.RDF ;
 import org.apache.jena.vocabulary.RDFS ;
 import org.apache.lucene.analysis.core.KeywordAnalyzer ;
@@ -163,6 +164,7 @@ public class TestEntityMapAssembler {
     private static final Property SPEC2_PREDICATE1 = RDFS.label;
     private static final Property SPEC2_PREDICATE2 = RDFS.comment;
     static {
+        JenaSystem.init();
         TextAssembler.init();
         Model model = ModelFactory.createDefaultModel();
         

http://git-wip-us.apache.org/repos/asf/jena/blob/ece2f41d/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
index dbef4df..d6cb2ce 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java
@@ -33,6 +33,7 @@ import org.apache.jena.rdf.model.Resource ;
 import org.apache.jena.sparql.core.DatasetGraph ;
 import org.apache.jena.sparql.core.Quad ;
 import org.apache.jena.sparql.core.QuadAction ;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.tdb.assembler.AssemblerTDB ;
 import org.apache.jena.vocabulary.RDF ;
 import org.junit.Test ;
@@ -93,6 +94,7 @@ public class TestTextDatasetAssembler extends AbstractTestTextAssembler {
     }
 
     static {
+        JenaSystem.init();
         TextAssembler.init();
         AssemblerTDB.init();
         spec1 =

http://git-wip-us.apache.org/repos/asf/jena/blob/ece2f41d/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
index 6742661..53d2eaf 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java
@@ -21,6 +21,7 @@ package org.apache.jena.query.text.assembler;
 import org.apache.jena.assembler.Assembler ;
 import org.apache.jena.query.text.TextIndexLucene ;
 import org.apache.jena.rdf.model.Resource ;
+import org.apache.jena.system.JenaSystem;
 import org.apache.jena.vocabulary.RDFS ;
 import org.apache.lucene.analysis.core.KeywordAnalyzer ;
 import org.apache.lucene.store.RAMDirectory ;
@@ -99,6 +100,7 @@ public class TestTextIndexLuceneAssembler extends AbstractTestTextAssembler {
     }
 
     static {
+        JenaSystem.init();
         TextAssembler.init();
     }
 


[14/18] jena git commit: Merge branch 'master' 3.3.0+ of https://github.com/apache/jena into generic-text-analyzers

Posted by an...@apache.org.
Merge branch 'master' 3.3.0+ of https://github.com/apache/jena into generic-text-analyzers


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/6ac75fea
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/6ac75fea
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/6ac75fea

Branch: refs/heads/master
Commit: 6ac75fea9a36d7d195d7cf4d9d5d2b4505252932
Parents: fef4d22 f1f50bd
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Tue May 23 13:12:39 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Tue May 23 13:12:39 2017 -0500

----------------------------------------------------------------------
 apache-jena-libs/pom.xml                        |   8 +-
 apache-jena-osgi/jena-osgi-features/pom.xml     |   2 +-
 apache-jena-osgi/jena-osgi/pom.xml              |  14 +-
 apache-jena-osgi/pom.xml                        |   4 +-
 apache-jena/pom.xml                             |  34 +-
 jena-arq/pom.xml                                |  12 +-
 .../apache/jena/atlas/data/SortedDataBag.java   |   1 +
 .../jena/query/QueryExecutionFactory.java       |  96 +++-
 .../java/org/apache/jena/riot/RDFDataMgr.java   |  14 +-
 .../java/org/apache/jena/riot/RDFWriter.java    | 220 +++++++++
 .../org/apache/jena/riot/RDFWriterBuilder.java  | 165 +++++++
 .../jena/riot/adapters/AdapterFileManager.java  |   3 +-
 .../apache/jena/riot/adapters/AdapterLib.java   |  51 +--
 .../apache/jena/riot/lang/ReaderRIOTRDFXML.java |  59 ++-
 .../jena/sparql/algebra/TableFactory.java       |   7 +-
 .../java/org/apache/jena/sparql/core/Quad.java  |   3 +
 .../jena/sparql/engine/QueryIterator.java       |   6 +
 .../jena/sparql/engine/binding/BindingRoot.java |   2 -
 .../jena/sparql/engine/http/HttpQuery.java      |  33 +-
 .../sparql/engine/http/QueryEngineHTTP.java     |  35 +-
 .../sparql/engine/iterator/QueryIterRoot.java   |  20 +-
 .../sparql/engine/iterator/QueryIterSort.java   |  14 +-
 .../sparql/engine/iterator/QueryIterTopN.java   |  46 +-
 .../jena/sparql/engine/main/OpExecutor.java     |   6 +-
 .../sparql/engine/main/QueryEngineMain.java     |   6 +-
 .../engine/main/StageGeneratorGeneric.java      |  11 +-
 .../org/apache/jena/sparql/engine/ref/Eval.java |   4 +-
 .../jena/sparql/engine/ref/EvaluatorSimple.java |   4 +-
 .../jena/sparql/modify/UpdateProcessRemote.java |   5 +-
 .../sparql/modify/UpdateProcessRemoteForm.java  |   7 +-
 .../jena/sparql/path/eval/PathEngine.java       |   2 +-
 .../jena/sparql/pfunction/PFuncListAndList.java |  51 +++
 .../sparql/pfunction/PFuncListAndSimple.java    |  59 +++
 .../jena/sparql/pfunction/PFuncSimple.java      |   3 +-
 .../sparql/pfunction/PFuncSimpleAndList.java    |   4 +-
 .../sparql/pfunction/PropertyFunctionEval.java  |   8 +
 .../jena/update/UpdateExecutionFactory.java     | 114 ++++-
 .../org/apache/jena/riot/TS_RiotGeneral.java    |   5 +-
 .../org/apache/jena/riot/TestRDFWriter.java     |  88 ++++
 .../engine/iterator/TestQueryIterSort.java      |  91 +++-
 jena-base/pom.xml                               |   6 +-
 .../apache/jena/atlas/lib/ByteBufferLib.java    |   4 +-
 jena-cmds/pom.xml                               |  12 +-
 jena-core/pom.xml                               |  10 +-
 jena-csv/pom.xml                                |  10 +-
 jena-elephas/jena-elephas-common/pom.xml        |   2 +-
 jena-elephas/jena-elephas-io/pom.xml            |   2 +-
 jena-elephas/jena-elephas-mapreduce/pom.xml     |   2 +-
 jena-elephas/jena-elephas-stats/pom.xml         |   2 +-
 jena-elephas/pom.xml                            |   6 +-
 jena-extras/jena-querybuilder/pom.xml           |   2 +-
 jena-extras/pom.xml                             |   6 +-
 jena-fuseki1/pom.xml                            |  18 +-
 jena-fuseki2/apache-jena-fuseki/pom.xml         |   4 +-
 jena-fuseki2/jena-fuseki-core/pom.xml           |  14 +-
 .../jena/fuseki/servlets/ResponseResultSet.java | 151 +++----
 .../src/main/webapp/dataset.html                |   4 +-
 jena-fuseki2/jena-fuseki-embedded/pom.xml       |   4 +-
 jena-fuseki2/jena-fuseki-server/pom.xml         |   4 +-
 jena-fuseki2/jena-fuseki-war/pom.xml            |   2 +-
 jena-fuseki2/pom.xml                            |   4 +-
 jena-integration-tests/pom.xml                  |  18 +-
 .../rdfconnection/TestRDFConnectionRemote.java  |  20 +-
 jena-iri/pom.xml                                |   4 +-
 jena-jdbc/jena-jdbc-core/pom.xml                |   4 +-
 jena-jdbc/jena-jdbc-driver-bundle/pom.xml       |  20 +-
 jena-jdbc/jena-jdbc-driver-mem/pom.xml          |   6 +-
 jena-jdbc/jena-jdbc-driver-remote/pom.xml       |  10 +-
 jena-jdbc/jena-jdbc-driver-tdb/pom.xml          |  10 +-
 jena-jdbc/pom.xml                               |   4 +-
 jena-parent/pom.xml                             |   2 +-
 jena-permissions/pom.xml                        |   4 +-
 jena-rdfconnection/pom.xml                      |  12 +-
 .../org/apache/jena/rdfconnection/RDFConn.java  |  41 +-
 .../rdfconnection/RDFConnectionFactory.java     |   3 +-
 .../jena/rdfconnection/RDFConnectionRemote.java |  70 +--
 .../jena/rdfconnection/TS_RDFConnection.java    |   3 +-
 .../apache/jena/rdfconnection/TestRDFConn.java  | 127 ++++++
 jena-sdb/pom.xml                                |  14 +-
 .../apache/jena/sdb/engine/QueryEngineSDB.java  |   2 +-
 jena-shaded-guava/pom.xml                       |   4 +-
 jena-spatial/pom.xml                            |  10 +-
 .../jena/query/spatial/SpatialIndexContext.java |   4 +-
 .../assembler/SpatialIndexLuceneAssembler.java  |   1 +
 .../query/spatial/pfunction/SpatialMatch.java   |   6 +-
 .../pfunction/SpatialOperationPFBase.java       |   7 +-
 .../jena/query/spatial/SpatialSearchUtil.java   |   4 +-
 jena-tdb/pom.xml                                |  12 +-
 jena-text-es/.gitignore                         |   0
 jena-text-es/LICENSE                            | 202 +++++++++
 jena-text-es/NOTICE                             |   5 +
 jena-text-es/pom.xml                            | 226 ++++++++++
 .../main/java/examples/JenaESTextExample.java   |  95 ++++
 .../apache/jena/query/text/es/ESSettings.java   | 177 ++++++++
 .../jena/query/text/es/InitJenaTextES.java      |  34 ++
 .../query/text/es/TextESDatasetFactory.java     |  57 +++
 .../apache/jena/query/text/es/TextIndexES.java  | 451 +++++++++++++++++++
 .../text/es/assembler/TextIndexESAssembler.java | 117 +++++
 .../src/main/resources/META-INF/LICENSE         | 202 +++++++++
 jena-text-es/src/main/resources/META-INF/NOTICE |   5 +
 ...rg.apache.jena.system.JenaSubsystemLifecycle |   1 +
 jena-text-es/src/main/resources/data-es.ttl     |  46 ++
 .../org/apache/jena/query/text/properties.xml   |   9 +
 .../src/main/resources/text-config-es.ttl       |  64 +++
 .../jena/query/text/es/it/BaseESTest.java       | 111 +++++
 .../jena/query/text/es/it/TextIndexESIT.java    | 306 +++++++++++++
 .../src/test/resources/log4j.properties         |  10 +
 jena-text-es/testing/TextQuery/data.skos        |  36 ++
 jena-text-es/testing/TextQuery/data1.ttl        |  26 ++
 .../testing/TextQuery/text-config-union.ttl     |  44 ++
 jena-text-es/testing/TextQuery/text-config.ttl  |  43 ++
 jena-text-es/text-config.ttl                    |  62 +++
 jena-text/pom.xml                               | 107 +----
 .../main/java/examples/JenaESTextExample.java   |  99 ----
 .../jena/query/text/DatasetGraphText.java       |   2 -
 .../org/apache/jena/query/text/ESSettings.java  | 177 --------
 .../jena/query/text/TextDatasetFactory.java     |  25 -
 .../org/apache/jena/query/text/TextIndexES.java | 450 ------------------
 .../org/apache/jena/query/text/TextQueryPF.java |  20 +-
 .../query/text/assembler/TextAssembler.java     |   1 -
 .../text/assembler/TextIndexESAssembler.java    | 113 -----
 .../jena/query/text/assembler/TextVocab.java    |   2 -
 .../apache/jena/query/text/it/BaseESTest.java   | 111 -----
 .../jena/query/text/it/TextIndexESIT.java       | 306 -------------
 pom.xml                                         |  11 +-
 125 files changed, 3875 insertions(+), 1886 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/6ac75fea/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/jena/blob/6ac75fea/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------


[02/18] jena git commit: initial documentation

Posted by an...@apache.org.
initial documentation

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/8b3757ba
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/8b3757ba
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/8b3757ba

Branch: refs/heads/master
Commit: 8b3757bae52d08d4b308bd0f996ff452c60cc7c9
Parents: 1440e81
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Apr 19 14:43:04 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Apr 19 14:43:04 2017 -0500

----------------------------------------------------------------------
 .../assembler/GenericAnalyzerAssembler.java     | 85 ++++++++++++++++++++
 1 file changed, 85 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/8b3757ba/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 5c25cb2..db707d2 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.jena.query.text.assembler;
 
 import org.apache.jena.assembler.Assembler;
@@ -5,7 +23,74 @@ import org.apache.jena.assembler.Mode;
 import org.apache.jena.assembler.assemblers.AssemblerBase;
 import org.apache.jena.rdf.model.Resource;
 
+/**
+ * Creates generic analyzers given a fully qualified Class name and a list
+ * of parameters for a constructor of the Class.
+ * <p>
+ * The parameters may be of the following types:
+ * <pre>
+ *     string    String
+ *     set       org.apache.lucene.analysis.util.CharArraySet
+ *     file      java.io.FileReader
+ *     int       int
+ *     boolean   boolean
+ * </pre>
+ * 
+ * Although the list of types is not exhaustive it is a simple matter
+ * to create a wrapper Analyzer that reads a file with information that can
+ * be used to initialize any sort of parameters that may be needed for
+ * a given Analyzer. The provided types cover the vast majority of cases.
+ * <p>
+ * For example, <code>org.apache.lucene.analysis.ja.JapaneseAnalyzer</code>
+ * has a constructor with 4 parameters: a <code>UserDict</code>,
+ * a <code>CharArraySet</code>, a <code>JapaneseTokenizer.Mode</code>, and a 
+ * <code>Set&lt;String></code>. So a simple wrapper can extract the values
+ * needed for the various parameters with types not available in this
+ * extension, construct the required instances, and instantiate the
+ * <code>JapaneseAnalyzer</code>.
+ * <p>
+ * Adding custom Analyzers such as the above wrapper analyzer is a simple
+ * matter of adding the Analyzer class and any associated filters and tokenizer
+ * and so on to the classpath for Jena - usually in a jar. Of course, all of 
+ * the Analyzers that are included in the Lucene distribution bundled with Jena
+ * are available as generic Analyzers as well.
+ * <p>
+ * Each parameter object is specified with:
+ * <ul>
+ * <li>an optional <code>text:paramName</code> that may be used to document which 
+ * parameter is represented</li>
+ * <li>a <code>text:paramType</code> which is one of: <code>string</code>, 
+ * <code>set</code>, <code>file</code>, <code>int</code>, <code>boolean</code>.</li>
+ * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
+ * </ul>
+ * <p>
+ * A parameter of type <code>set</code> <i>may have</i> zero or more <code>text:paramValue</code>s.
+ * <p>
+ * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
+ * <code>int</code> <i>must have</i> a single <code>text:paramValue</code>
+ */
 public class GenericAnalyzerAssembler extends AssemblerBase {
+    /*
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:GenericAnalyzer ;
+               text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
+               text:params [
+                    a rdf:seq ;
+                    rdf:_1 [
+                        text:paramName "stopwords" ;
+                        text:paramType "set" ;
+                        text:paramValue "the", "a", "an" ] ;
+                    rdf:_2 [
+                        text:paramName "stemExclusionSet" ;
+                        text:paramType "set" ;
+                        text:paramValue "ing", "ed" ]
+                    ]
+                ]
+          ] .
+     */
 
 	public GenericAnalyzerAssembler() {
 		// TODO Auto-generated constructor stub


[11/18] jena git commit: factor DefinedAnalyzerAssembler and DefineAnalyzersAssembler into separate classes; move defined analyzer cache to Utils along side the language tagged analyzers since both caches have the same lifetime and similar uses.

Posted by an...@apache.org.
factor DefinedAnalyzerAssembler and DefineAnalyzersAssembler into
separate classes; move defined analyzer cache to Utils along side the
language tagged analyzers since both caches have the same lifetime and
similar uses.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/5edb6c87
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/5edb6c87
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/5edb6c87

Branch: refs/heads/master
Commit: 5edb6c8758124fe8dd5a96d7b92949fc3ac1f61f
Parents: 311efab
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 10:13:09 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 10:13:09 2017 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/analyzer/Util.java   |  12 +++
 .../assembler/DefineAnalyzersAssembler.java     | 105 +++++++++++++++++++
 .../assembler/DefinedAnalyzerAssembler.java     | 103 +++++-------------
 .../assembler/TextIndexLuceneAssembler.java     |  10 +-
 4 files changed, 152 insertions(+), 78 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 20c7573..6ad0747 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -18,6 +18,7 @@
 
 package org.apache.jena.query.text.analyzer;
 
+import org.apache.jena.rdf.model.Resource;
 import org.apache.lucene.analysis.Analyzer;
 import java.lang.reflect.Constructor;
 import java.util.Hashtable;
@@ -26,6 +27,9 @@ public class Util {
 
     private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
     private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+    
+    // cache of defined text:defineAnalyzers
+    private static Hashtable<String, Analyzer> definedAnalyzers = new Hashtable<>();
 
     static {
         initAnalyzerDefs();
@@ -55,6 +59,14 @@ public class Util {
     public static void addAnalyzer(String lang, Analyzer analyzer) {
         cache.put(lang, analyzer);
     }
+    
+    public static Analyzer getDefinedAnalyzer(Resource key) {
+        return definedAnalyzers.get(key.getURI());
+    }
+    
+    public static void defineAnalyzer(Resource key, Analyzer analyzer) {
+        definedAnalyzers.put(key.getURI(), analyzer);
+    }
 
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();

http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
new file mode 100644
index 0000000..11270e2
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefineAnalyzersAssembler {
+    /*
+    <#indexLucene> a text:TextIndexLucene ;
+        text:directory <file:Lucene> ;
+        text:entityMap <#entMap> ;
+        text:defineAnalyzers (
+            [text:addLang "sa-x-iast" ;
+             text:analyzer [ . . . ]]
+            [text:defineAnalyzer <#foo> ;
+             text:analyzer [ . . . ]]
+        )
+    */
+
+    public static boolean open(Assembler a, Resource list) {
+        Resource current = list;
+        boolean isMultilingualSupport = false;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            // process the current list element to add an analyzer 
+            Resource adding = (Resource) first;
+            if (adding.hasProperty(TextVocab.pAnalyzer)) {
+                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+                RDFNode analyzerNode = analyzerStmt.getObject();
+                if (!analyzerNode.isResource()) {
+                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+                }
+                
+                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+                
+                if (adding.hasProperty(TextVocab.pAddLang)) {
+                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+                    String langCode = langStmt.getString();
+                    Util.addAnalyzer(langCode, analyzer);
+                    isMultilingualSupport = true;
+                }
+                
+                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+                    Resource id = defStmt.getResource();
+                    
+                    if (id.getURI() != null) {
+                        Util.defineAnalyzer(id, analyzer);
+                    } else {
+                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+                    }
+                }
+            } else {
+                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+            }
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return isMultilingualSupport;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
index e7bd941..e6909ac 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -1,88 +1,39 @@
-package org.apache.jena.query.text.assembler;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
-import java.util.Hashtable;
+package org.apache.jena.query.text.assembler;
 
 import org.apache.jena.assembler.Assembler;
 import org.apache.jena.assembler.Mode;
 import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.query.text.TextIndexException;
 import org.apache.jena.query.text.analyzer.Util;
-import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.rdf.model.Statement;
-import org.apache.jena.vocabulary.RDF;
-import org.apache.lucene.analysis.Analyzer;
 
 public class DefinedAnalyzerAssembler extends AssemblerBase {
-    
-    private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
-    
-    public static void addAnalyzer(Resource key, Analyzer analyzer) {
-        analyzers.put(key, analyzer);
-    }
-    
-    public static boolean addAnalyzers(Assembler a, Resource list) {
-        Resource current = list;
-        boolean isMultilingualSupport = false;
-        
-        while (current != null && ! current.equals(RDF.nil)){
-            Statement firstStmt = current.getProperty(RDF.first);
-            if (firstStmt == null) {
-                throw new TextIndexException("parameter list not well formed: " + current);
-            }
-            
-            RDFNode first = firstStmt.getObject();
-            if (! first.isResource()) {
-                throw new TextIndexException("parameter specification must be an anon resource : " + first);
-            }
-
-            // process the current list element to add an analyzer 
-            Resource adding = (Resource) first;
-            if (adding.hasProperty(TextVocab.pAnalyzer)) {
-                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
-                RDFNode analyzerNode = analyzerStmt.getObject();
-                if (!analyzerNode.isResource()) {
-                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
-                }
-                
-                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
-                
-                if (adding.hasProperty(TextVocab.pAddLang)) {
-                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
-                    String langCode = langStmt.getString();
-                    Util.addAnalyzer(langCode, analyzer);
-                    isMultilingualSupport = true;
-                }
-                
-                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
-                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
-                    Resource id = defStmt.getResource();
-                    
-                    if (id.getURI() != null) {
-                        DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
-                    } else {
-                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
-                    }
-                }
-            } else {
-                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
-            }
-            
-            Statement restStmt = current.getProperty(RDF.rest);
-            if (restStmt == null) {
-                throw new TextIndexException("parameter list not terminated by rdf:nil");
-            }
-            
-            RDFNode rest = restStmt.getObject();
-            if (! rest.isResource()) {
-                throw new TextIndexException("parameter list node is not a resource : " + rest);
-            }
-            
-            current = (Resource) rest;
-        }
-        
-        return isMultilingualSupport;
-    }
+    /*
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:DefinedAnalyzer ;
+               text:useAnalyzer <#Foo> ]
+     */
    
     @Override
     public Object open(Assembler a, Resource root, Mode mode) {
@@ -91,7 +42,7 @@ public class DefinedAnalyzerAssembler extends AssemblerBase {
             Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
             Resource key = useStmt.getResource();
             
-            return analyzers.get(key);
+            return Util.getDefinedAnalyzer(key);
         }
         
         return null;

http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 7acfb9e..14af9bf 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -26,6 +26,7 @@ import org.apache.jena.assembler.Mode ;
 import org.apache.jena.assembler.assemblers.AssemblerBase ;
 import org.apache.jena.atlas.io.IO ;
 import org.apache.jena.atlas.lib.IRILib ;
+import org.apache.jena.atlas.logging.Log;
 import org.apache.jena.query.text.*;
 import org.apache.jena.rdf.model.RDFNode ;
 import org.apache.jena.rdf.model.Resource ;
@@ -120,10 +121,15 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
             if (null != defAnalyzersStatement) {
                 RDFNode aNode = defAnalyzersStatement.getObject();
                 if (! aNode.isResource()) {
-                    throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+                    throw new TextIndexException("text:defineAnalyzers property is not a resource (list) : " + aNode);
                 }
-                boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+                boolean addedLangs = DefineAnalyzersAssembler.open(a, (Resource) aNode);
+                // if the text:defineAnalyzers added any analyzers to lang tags then ensure that
+                // multilingual support is enabled
                 if (addedLangs) {
+                    if (!isMultilingualSupport) {
+                        Log.warn(this,  "Multilingual support implicitly enabled by text:defineAnalyzers");
+                    }
                     isMultilingualSupport = true;
                 }
             }