You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:19 UTC
[10/18] jena git commit: represent parameter types as resources like text:TypeSet instead of literal string

represent parameter types as resources like text:TypeSet instead of
literal string

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/311efab2
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/311efab2
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/311efab2

Branch: refs/heads/master
Commit: 311efab2fd26a58406b29b64d74b41039292d080
Parents: a3bb8e41
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 09:18:35 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 09:18:35 2017 -0500

----------------------------------------------------------------------
 .../assembler/GenericAnalyzerAssembler.java     | 208 ++++++++++---------
 .../jena/query/text/assembler/TextVocab.java    |   6 +
 .../assembler/TestGenericAnalyzerAssembler.java |  29 +--
 3 files changed, 125 insertions(+), 118 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 853fcb6..4f10b85 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -43,12 +43,12 @@ import org.apache.lucene.analysis.CharArraySet;
  * <p>
  * The parameters may be of the following types:
  * <pre>
- *     string    String
- *     set       org.apache.lucene.analysis.util.CharArraySet
- *     file      java.io.FileReader
- *     int       int
- *     boolean   boolean
- *     analyzer  org.apache.lucene.analysis.Analyzer
+ *     text:TypeString    String
+ *     text:TypeSet       org.apache.lucene.analysis.util.CharArraySet
+ *     text:TypeFile      java.io.FileReader
+ *     text:TypeInt       int
+ *     text:TypeBoolean   boolean
+ *     text:TypeAnalyzer  org.apache.lucene.analysis.Analyzer
  * </pre>
  * 
  * Although the list of types is not exhaustive it is a simple matter
@@ -74,15 +74,18 @@ import org.apache.lucene.analysis.CharArraySet;
  * <ul>
  * <li>an optional <code>text:paramName</code> that may be used to document which 
  * parameter is represented</li>
- * <li>a <code>text:paramType</code> which is one of: <code>string</code>, 
- * <code>set</code>, <code>file</code>, <code>int</code>, <code>boolean</code>.</li>
- * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
+ * <li>a <code>text:paramType</code> which is one of: <code>text:TypeString</code>, 
+ * <code>text:TypeSet</code>, <code>text:TypeFile</code>, <code>text:TypeInt</code>, 
+ * <code>text:TypeBoolean</code>, <code>text:TypeAnalyzer</code>.</li>
+ * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int or resource.</li>
  * </ul>
  * <p>
- * A parameter of type <code>set</code> <i>must have</i> a list of zero or more <code>String</code>s.
+ * A parameter of type <code>text:TypeSet</code> <i>must have</i> a list of zero or 
+ * more <code>String</code>s.
  * <p>
- * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or 
- * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
+ * A parameter of type <code>text:TypeString</code>, <code>text:TypeFile</code>, 
+ * <code>text:TypeBoolean</code>, <code>text:TypeInt</code> or <code>text:TypeAnalyzer</code> 
+ * <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
  * <p>
  * Examples:
  * <pre>
@@ -94,10 +97,10 @@ import org.apache.lucene.analysis.CharArraySet;
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                text:params (
                     [ text:paramName "stopwords" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("the" "a" "an") ]
                     [ text:paramName "stemExclusionSet" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("ing" "ed") ]
                     )
            ] .
@@ -111,10 +114,10 @@ import org.apache.lucene.analysis.CharArraySet;
                text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
                text:params (
                     [ text:paramName "defaultAnalyzer" ;
-                      text:paramType "analyzer" ;
+                      text:paramType text:TypeAnalyzer ;
                       text:paramValue [ a text:SimpleAnalyzer ] ]
                     [ text:paramName "maxShingleSize" ;
-                      text:paramType "int" ;
+                      text:paramType text:TypeInt ;
                       text:paramValue 3 ]
                     )
            ] .
@@ -130,71 +133,71 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
                text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
                text:params (
                     [ text:paramName "stopwords" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("the" "a" "an") ]
                     [ text:paramName "stemExclusionSet" ;
-                      text:paramType "set" ;
+                      text:paramType text:TypeSet ;
                       text:paramValue ("ing" "ed") ]
                     )
            ] .
      */
 
-    public static final String TYPE_ANALYZER = "analyzer";
-    public static final String TYPE_BOOL = "boolean";
-    public static final String TYPE_FILE = "file";
-    public static final String TYPE_INT = "int";
-    public static final String TYPE_SET = "set";
-    public static final String TYPE_STRING = "string";
+    public static final String TYPE_ANALYZER   = "TypeAnalyzer";
+    public static final String TYPE_BOOL       = "TypeBoolean";
+    public static final String TYPE_FILE       = "TypeFile";
+    public static final String TYPE_INT        = "TypeInt";
+    public static final String TYPE_SET        = "TypeSet";
+    public static final String TYPE_STRING     = "TypeString";
 
     @Override
-	public Analyzer open(Assembler a, Resource root, Mode mode) {
-	    if (root.hasProperty(TextVocab.pClass)) {
-	        // text:class is expected to be a string literal
-	        String className = root.getProperty(TextVocab.pClass).getString();
-
-	        // is the class accessible?
-	        Class<?> clazz = null;
-	        try {
-	            clazz = Class.forName(className);
-	        } catch (ClassNotFoundException e) {
-	            Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
-	            return null;
-	        }
-
-	        // Is the class an Analyzer?
-	        if (!Analyzer.class.isAssignableFrom(clazz)) {
-	            Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
-	            return null;
-	        }
-	        
-	        if (root.hasProperty(TextVocab.pParams)) {
-	            RDFNode node = root.getProperty(TextVocab.pParams).getObject();
-	            if (! node.isResource()) {
-	                throw new TextIndexException("text:params must be a list of parameter resources: " + node);
-	            }
-
-	            List<ParamSpec> specs = getParamSpecs((Resource) node);
-
-	            // split the param specs into classes and values for constructor lookup
-	            final Class<?> paramClasses[] = new Class<?>[specs.size()];
-	            final Object paramValues[] = new Object[specs.size()];
-	            for (int i = 0; i < specs.size(); i++) {
-	                ParamSpec spec = specs.get(i);
-	                paramClasses[i] = spec.getValueClass();
-	                paramValues[i] = spec.getValue();
-	            }
-
-	            // Create new analyzer
-	            return newAnalyzer(clazz, paramClasses, paramValues);
-
-	        } else {
-	            // use the nullary Analyzer constructor
-	            return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
-	        }
-	    } else {
-	        throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
-	    }
-	}
+    public Analyzer open(Assembler a, Resource root, Mode mode) {
+        if (root.hasProperty(TextVocab.pClass)) {
+            // text:class is expected to be a string literal
+            String className = root.getProperty(TextVocab.pClass).getString();
+
+            // is the class accessible?
+            Class<?> clazz = null;
+            try {
+                clazz = Class.forName(className);
+            } catch (ClassNotFoundException e) {
+                Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
+                return null;
+            }
+
+            // Is the class an Analyzer?
+            if (!Analyzer.class.isAssignableFrom(clazz)) {
+                Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
+                return null;
+            }
+
+            if (root.hasProperty(TextVocab.pParams)) {
+                RDFNode node = root.getProperty(TextVocab.pParams).getObject();
+                if (! node.isResource()) {
+                    throw new TextIndexException("text:params must be a list of parameter resources: " + node);
+                }
+
+                List<ParamSpec> specs = getParamSpecs((Resource) node);
+
+                // split the param specs into classes and values for constructor lookup
+                final Class<?> paramClasses[] = new Class<?>[specs.size()];
+                final Object paramValues[] = new Object[specs.size()];
+                for (int i = 0; i < specs.size(); i++) {
+                    ParamSpec spec = specs.get(i);
+                    paramClasses[i] = spec.getValueClass();
+                    paramValues[i] = spec.getValue();
+                }
+
+                // Create new analyzer
+                return newAnalyzer(clazz, paramClasses, paramValues);
+
+            } else {
+                // use the nullary Analyzer constructor
+                return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
+            }
+        } else {
+            throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
+        }
+    }
 
     /**
      * Create instance of the Lucene Analyzer, <code>class</code>, with provided parameters
@@ -221,47 +224,52 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
         return null;
     }
-    
+
     private List<ParamSpec> getParamSpecs(Resource list) {
         List<ParamSpec> result = new ArrayList<>();
         Resource current = list;
-        
+
         while (current != null && ! current.equals(RDF.nil)){
             Statement firstStmt = current.getProperty(RDF.first);
             if (firstStmt == null) {
                 throw new TextIndexException("parameter list not well formed: " + current);
             }
-            
+
             RDFNode first = firstStmt.getObject();
             if (! first.isResource()) {
                 throw new TextIndexException("parameter specification must be an anon resource : " + first);
             }
 
             result.add(getParamSpec((Resource) first));
-            
+
             Statement restStmt = current.getProperty(RDF.rest);
             if (restStmt == null) {
                 throw new TextIndexException("parameter list not terminated by rdf:nil");
             }
-            
+
             RDFNode rest = restStmt.getObject();
             if (! rest.isResource()) {
                 throw new TextIndexException("parameter list node is not a resource : " + rest);
             }
-            
+
             current = (Resource) rest;
         }
-        
+
         return result;
     }
-    
+
     private ParamSpec getParamSpec(Resource node) {
         Statement nameStmt = node.getProperty(TextVocab.pParamName);
         Statement typeStmt = node.getProperty(TextVocab.pParamType);
         Statement valueStmt = node.getProperty(TextVocab.pParamValue);
         
+        if (typeStmt == null) {
+            throw new TextIndexException("Parameter specification must have a text:paramType: " + node);
+        }        
+        Resource typeRes = typeStmt.getResource();
+        String type = typeRes.getLocalName();
+
         String name = getStringValue(nameStmt);
-        String type = getStringValue(typeStmt);
         String value = getStringValue(valueStmt);
 
         switch (type) {
@@ -274,7 +282,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
             return new ParamSpec(name, value, String.class);
         }
-        
+
         // java.io.FileReader
         case TYPE_FILE: {
 
@@ -291,23 +299,23 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
                 throw new TextIndexException("File " + value + " for param " + name + " not found!");
             }
         }
-        
+
         // org.apache.lucene.analysis.util.CharArraySet
         case TYPE_SET: {
             if (valueStmt == null) {
                 throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
             }
-            
+
             RDFNode valueNode = valueStmt.getObject();
             if (!valueNode.isResource()) {
                 throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode);
             }
-            
+
             List<String> values = toStrings((Resource) valueNode);
 
             return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
         }
-        
+
         // int
         case TYPE_INT:
             if (value == null) {
@@ -317,7 +325,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
             int n = ((Literal) valueStmt.getObject()).getInt();
             return new ParamSpec(name, n, int.class);
 
-        // boolean
+            // boolean
         case TYPE_BOOL:
             if (value == null) {
                 throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
@@ -325,21 +333,21 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
             boolean b = ((Literal) valueStmt.getObject()).getBoolean();
             return new ParamSpec(name, b, boolean.class);
-        
-        // org.apache.lucene.analysis.Analyzer
+
+            // org.apache.lucene.analysis.Analyzer
         case TYPE_ANALYZER:
             if (valueStmt == null) {
                 throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
             }
-            
+
             RDFNode valueNode = valueStmt.getObject();
             if (!valueNode.isResource()) {
                 throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
             }
-            
+
             Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
             return new ParamSpec(name, analyzer, Analyzer.class);
-        
+
         default:
             // there was no match
             Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
@@ -348,7 +356,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
 
         return null;
     }
-    
+
     private String getStringValue(Statement stmt) {
         if (stmt == null) {
             return null;
@@ -365,33 +373,33 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
     private List<String> toStrings(Resource list) {
         List<String> result = new ArrayList<>();
         Resource current = list;
-        
+
         while (current != null && ! current.equals(RDF.nil)){
             Statement firstStmt = current.getProperty(RDF.first);
             if (firstStmt == null) {
                 throw new TextIndexException("param spec of type set not well formed");
             }
-            
+
             RDFNode first = firstStmt.getObject();
             if (! first.isLiteral()) {
                 throw new TextIndexException("param spec of type set item is not a literal: " + first);
             }
-            
+
             result.add(((Literal)first).getLexicalForm());
-            
+
             Statement restStmt = current.getProperty(RDF.rest);
             if (restStmt == null) {
                 throw new TextIndexException("param spec of type set not terminated by rdf:nil");
             }
-            
+
             RDFNode rest = restStmt.getObject();
             if (! rest.isResource()) {
                 throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
             }
-            
+
             current = (Resource) rest;
         }
-        
+
         return result;
     }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index b051252..78cf0c0 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -89,6 +89,12 @@ public class TextVocab
     //GenericAnalyzer
     public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
     public static final Resource definedAnalyzer    = Vocab.resource(NS, "DefinedAnalyzer");
+    public static final Resource typeAnalyzer       = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_ANALYZER);
+    public static final Resource typeBoolean        = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_BOOL);
+    public static final Resource typeFile           = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_FILE);
+    public static final Resource typeInt            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_INT);
+    public static final Resource typeSet            = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_SET);
+    public static final Resource typeString         = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_STRING);
     public static final Property pClass             = Vocab.property(NS, "class");
     public static final Property pParams            = Vocab.property(NS, "params");
     public static final Property pParamName         = Vocab.property(NS, "paramName");

http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 87c5d75..3effc39 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -67,13 +67,6 @@ public class TestGenericAnalyzerAssembler {
     
     private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
-    private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
-    private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
-    private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
-    private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
-    private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
-    private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
-    
     static {
         TextAssembler.init();
         Model model = ModelFactory.createDefaultModel();
@@ -106,7 +99,7 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "stopWords")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_SET)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeSet)
                                                   .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
                                           }))
                      ;
@@ -121,14 +114,14 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "defaultAnalyzer")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                   .addProperty(TextVocab.pParamValue, 
                                                                model.createResource()
                                                                .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                ),
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "maxShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 3)
                                           }))
                      ;
@@ -143,34 +136,34 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "delegate")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
                                                   .addProperty(TextVocab.pParamValue, 
                                                                model.createResource()
                                                                .addProperty(RDF.type, TextVocab.simpleAnalyzer)
                                                                ) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "minShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 2) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "maxShingleSize")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeInt)
                                                   .addLiteral(TextVocab.pParamValue, 4) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "tokenSeparator")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                   .addLiteral(TextVocab.pParamValue, "|") ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "outputUnigrams")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                   .addLiteral(TextVocab.pParamValue, false) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
                                                   .addLiteral(TextVocab.pParamValue, true) ,
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "fillerToken")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeString)
                                                   .addLiteral(TextVocab.pParamValue, "foo")
                                           }))
                      ;
@@ -185,7 +178,7 @@ public class TestGenericAnalyzerAssembler {
                                           new RDFNode[] { 
                                                   model.createResource()
                                                   .addProperty(TextVocab.pParamName, "stopWords")
-                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+                                                  .addProperty(TextVocab.pParamType, TextVocab.typeFile)
                                                   .addProperty(TextVocab.pParamValue, FILE_STOPS)
                                           }))
                      ;