You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:19 UTC
[10/18] jena git commit: represent parameter types as resources like
text:TypeSet instead of literal string
represent parameter types as resources like text:TypeSet instead of
literal string
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/311efab2
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/311efab2
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/311efab2
Branch: refs/heads/master
Commit: 311efab2fd26a58406b29b64d74b41039292d080
Parents: a3bb8e41
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 09:18:35 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 09:18:35 2017 -0500
----------------------------------------------------------------------
.../assembler/GenericAnalyzerAssembler.java | 208 ++++++++++---------
.../jena/query/text/assembler/TextVocab.java | 6 +
.../assembler/TestGenericAnalyzerAssembler.java | 29 +--
3 files changed, 125 insertions(+), 118 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 853fcb6..4f10b85 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -43,12 +43,12 @@ import org.apache.lucene.analysis.CharArraySet;
* <p>
* The parameters may be of the following types:
* <pre>
- * string String
- * set org.apache.lucene.analysis.util.CharArraySet
- * file java.io.FileReader
- * int int
- * boolean boolean
- * analyzer org.apache.lucene.analysis.Analyzer
+ * text:TypeString String
+ * text:TypeSet org.apache.lucene.analysis.util.CharArraySet
+ * text:TypeFile java.io.FileReader
+ * text:TypeInt int
+ * text:TypeBoolean boolean
+ * text:TypeAnalyzer org.apache.lucene.analysis.Analyzer
* </pre>
*
* Although the list of types is not exhaustive it is a simple matter
@@ -74,15 +74,18 @@ import org.apache.lucene.analysis.CharArraySet;
* <ul>
* <li>an optional <code>text:paramName</code> that may be used to document which
* parameter is represented</li>
- * <li>a <code>text:paramType</code> which is one of: <code>string</code>,
- * <code>set</code>, <code>file</code>, <code>int</code>, <code>boolean</code>.</li>
- * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
+ * <li>a <code>text:paramType</code> which is one of: <code>text:TypeString</code>,
+ * <code>text:TypeSet</code>, <code>text:TypeFile</code>, <code>text:TypeInt</code>,
+ * <code>text:TypeBoolean</code>, <code>text:TypeAnalyzer</code>.</li>
+ * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int or resource.</li>
* </ul>
* <p>
- * A parameter of type <code>set</code> <i>must have</i> a list of zero or more <code>String</code>s.
+ * A parameter of type <code>text:TypeSet</code> <i>must have</i> a list of zero or
+ * more <code>String</code>s.
* <p>
- * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or
- * <code>int</code> <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
+ * A parameter of type <code>text:TypeString</code>, <code>text:TypeFile</code>,
+ * <code>text:TypeBoolean</code>, <code>text:TypeInt</code> or <code>text:TypeAnalyzer</code>
+ * <i>must have</i> a single <code>text:paramValue</code> of the appropriate type.
* <p>
* Examples:
* <pre>
@@ -94,10 +97,10 @@ import org.apache.lucene.analysis.CharArraySet;
text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
text:params (
[ text:paramName "stopwords" ;
- text:paramType "set" ;
+ text:paramType text:TypeSet ;
text:paramValue ("the" "a" "an") ]
[ text:paramName "stemExclusionSet" ;
- text:paramType "set" ;
+ text:paramType text:TypeSet ;
text:paramValue ("ing" "ed") ]
)
] .
@@ -111,10 +114,10 @@ import org.apache.lucene.analysis.CharArraySet;
text:class "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper" ;
text:params (
[ text:paramName "defaultAnalyzer" ;
- text:paramType "analyzer" ;
+ text:paramType text:TypeAnalyzer ;
text:paramValue [ a text:SimpleAnalyzer ] ]
[ text:paramName "maxShingleSize" ;
- text:paramType "int" ;
+ text:paramType text:TypeInt ;
text:paramValue 3 ]
)
] .
@@ -130,71 +133,71 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
text:params (
[ text:paramName "stopwords" ;
- text:paramType "set" ;
+ text:paramType text:TypeSet ;
text:paramValue ("the" "a" "an") ]
[ text:paramName "stemExclusionSet" ;
- text:paramType "set" ;
+ text:paramType text:TypeSet ;
text:paramValue ("ing" "ed") ]
)
] .
*/
- public static final String TYPE_ANALYZER = "analyzer";
- public static final String TYPE_BOOL = "boolean";
- public static final String TYPE_FILE = "file";
- public static final String TYPE_INT = "int";
- public static final String TYPE_SET = "set";
- public static final String TYPE_STRING = "string";
+ public static final String TYPE_ANALYZER = "TypeAnalyzer";
+ public static final String TYPE_BOOL = "TypeBoolean";
+ public static final String TYPE_FILE = "TypeFile";
+ public static final String TYPE_INT = "TypeInt";
+ public static final String TYPE_SET = "TypeSet";
+ public static final String TYPE_STRING = "TypeString";
@Override
- public Analyzer open(Assembler a, Resource root, Mode mode) {
- if (root.hasProperty(TextVocab.pClass)) {
- // text:class is expected to be a string literal
- String className = root.getProperty(TextVocab.pClass).getString();
-
- // is the class accessible?
- Class<?> clazz = null;
- try {
- clazz = Class.forName(className);
- } catch (ClassNotFoundException e) {
- Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
- return null;
- }
-
- // Is the class an Analyzer?
- if (!Analyzer.class.isAssignableFrom(clazz)) {
- Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
- return null;
- }
-
- if (root.hasProperty(TextVocab.pParams)) {
- RDFNode node = root.getProperty(TextVocab.pParams).getObject();
- if (! node.isResource()) {
- throw new TextIndexException("text:params must be a list of parameter resources: " + node);
- }
-
- List<ParamSpec> specs = getParamSpecs((Resource) node);
-
- // split the param specs into classes and values for constructor lookup
- final Class<?> paramClasses[] = new Class<?>[specs.size()];
- final Object paramValues[] = new Object[specs.size()];
- for (int i = 0; i < specs.size(); i++) {
- ParamSpec spec = specs.get(i);
- paramClasses[i] = spec.getValueClass();
- paramValues[i] = spec.getValue();
- }
-
- // Create new analyzer
- return newAnalyzer(clazz, paramClasses, paramValues);
-
- } else {
- // use the nullary Analyzer constructor
- return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
- }
- } else {
- throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
- }
- }
+ public Analyzer open(Assembler a, Resource root, Mode mode) {
+ if (root.hasProperty(TextVocab.pClass)) {
+ // text:class is expected to be a string literal
+ String className = root.getProperty(TextVocab.pClass).getString();
+
+ // is the class accessible?
+ Class<?> clazz = null;
+ try {
+ clazz = Class.forName(className);
+ } catch (ClassNotFoundException e) {
+ Log.error(this, "Analyzer class " + className + " not found. " + e.getMessage(), e);
+ return null;
+ }
+
+ // Is the class an Analyzer?
+ if (!Analyzer.class.isAssignableFrom(clazz)) {
+ Log.error(this, clazz.getName() + " has to be a subclass of " + Analyzer.class.getName());
+ return null;
+ }
+
+ if (root.hasProperty(TextVocab.pParams)) {
+ RDFNode node = root.getProperty(TextVocab.pParams).getObject();
+ if (! node.isResource()) {
+ throw new TextIndexException("text:params must be a list of parameter resources: " + node);
+ }
+
+ List<ParamSpec> specs = getParamSpecs((Resource) node);
+
+ // split the param specs into classes and values for constructor lookup
+ final Class<?> paramClasses[] = new Class<?>[specs.size()];
+ final Object paramValues[] = new Object[specs.size()];
+ for (int i = 0; i < specs.size(); i++) {
+ ParamSpec spec = specs.get(i);
+ paramClasses[i] = spec.getValueClass();
+ paramValues[i] = spec.getValue();
+ }
+
+ // Create new analyzer
+ return newAnalyzer(clazz, paramClasses, paramValues);
+
+ } else {
+ // use the nullary Analyzer constructor
+ return newAnalyzer(clazz, new Class<?>[0], new Object[0]);
+ }
+ } else {
+ throw new TextIndexException("text:class property is required by GenericAnalyzer: " + root);
+ }
+ }
/**
* Create instance of the Lucene Analyzer, <code>class</code>, with provided parameters
@@ -221,47 +224,52 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
return null;
}
-
+
private List<ParamSpec> getParamSpecs(Resource list) {
List<ParamSpec> result = new ArrayList<>();
Resource current = list;
-
+
while (current != null && ! current.equals(RDF.nil)){
Statement firstStmt = current.getProperty(RDF.first);
if (firstStmt == null) {
throw new TextIndexException("parameter list not well formed: " + current);
}
-
+
RDFNode first = firstStmt.getObject();
if (! first.isResource()) {
throw new TextIndexException("parameter specification must be an anon resource : " + first);
}
result.add(getParamSpec((Resource) first));
-
+
Statement restStmt = current.getProperty(RDF.rest);
if (restStmt == null) {
throw new TextIndexException("parameter list not terminated by rdf:nil");
}
-
+
RDFNode rest = restStmt.getObject();
if (! rest.isResource()) {
throw new TextIndexException("parameter list node is not a resource : " + rest);
}
-
+
current = (Resource) rest;
}
-
+
return result;
}
-
+
private ParamSpec getParamSpec(Resource node) {
Statement nameStmt = node.getProperty(TextVocab.pParamName);
Statement typeStmt = node.getProperty(TextVocab.pParamType);
Statement valueStmt = node.getProperty(TextVocab.pParamValue);
+ if (typeStmt == null) {
+ throw new TextIndexException("Parameter specification must have a text:paramType: " + node);
+ }
+ Resource typeRes = typeStmt.getResource();
+ String type = typeRes.getLocalName();
+
String name = getStringValue(nameStmt);
- String type = getStringValue(typeStmt);
String value = getStringValue(valueStmt);
switch (type) {
@@ -274,7 +282,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
return new ParamSpec(name, value, String.class);
}
-
+
// java.io.FileReader
case TYPE_FILE: {
@@ -291,23 +299,23 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
throw new TextIndexException("File " + value + " for param " + name + " not found!");
}
}
-
+
// org.apache.lucene.analysis.util.CharArraySet
case TYPE_SET: {
if (valueStmt == null) {
throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
}
-
+
RDFNode valueNode = valueStmt.getObject();
if (!valueNode.isResource()) {
throw new TextIndexException("A set param spec text:paramValue must be a list of strings: " + valueNode);
}
-
+
List<String> values = toStrings((Resource) valueNode);
return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
}
-
+
// int
case TYPE_INT:
if (value == null) {
@@ -317,7 +325,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
int n = ((Literal) valueStmt.getObject()).getInt();
return new ParamSpec(name, n, int.class);
- // boolean
+ // boolean
case TYPE_BOOL:
if (value == null) {
throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
@@ -325,21 +333,21 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
boolean b = ((Literal) valueStmt.getObject()).getBoolean();
return new ParamSpec(name, b, boolean.class);
-
- // org.apache.lucene.analysis.Analyzer
+
+ // org.apache.lucene.analysis.Analyzer
case TYPE_ANALYZER:
if (valueStmt == null) {
throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
}
-
+
RDFNode valueNode = valueStmt.getObject();
if (!valueNode.isResource()) {
throw new TextIndexException("Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
}
-
+
Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
return new ParamSpec(name, analyzer, Analyzer.class);
-
+
default:
// there was no match
Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
@@ -348,7 +356,7 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
return null;
}
-
+
private String getStringValue(Statement stmt) {
if (stmt == null) {
return null;
@@ -365,33 +373,33 @@ public class GenericAnalyzerAssembler extends AssemblerBase {
private List<String> toStrings(Resource list) {
List<String> result = new ArrayList<>();
Resource current = list;
-
+
while (current != null && ! current.equals(RDF.nil)){
Statement firstStmt = current.getProperty(RDF.first);
if (firstStmt == null) {
throw new TextIndexException("param spec of type set not well formed");
}
-
+
RDFNode first = firstStmt.getObject();
if (! first.isLiteral()) {
throw new TextIndexException("param spec of type set item is not a literal: " + first);
}
-
+
result.add(((Literal)first).getLexicalForm());
-
+
Statement restStmt = current.getProperty(RDF.rest);
if (restStmt == null) {
throw new TextIndexException("param spec of type set not terminated by rdf:nil");
}
-
+
RDFNode rest = restStmt.getObject();
if (! rest.isResource()) {
throw new TextIndexException("param spec of type set rest is not a resource: " + rest);
}
-
+
current = (Resource) rest;
}
-
+
return result;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index b051252..78cf0c0 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -89,6 +89,12 @@ public class TextVocab
//GenericAnalyzer
public static final Resource genericAnalyzer = Vocab.resource(NS, "GenericAnalyzer");
public static final Resource definedAnalyzer = Vocab.resource(NS, "DefinedAnalyzer");
+ public static final Resource typeAnalyzer = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_ANALYZER);
+ public static final Resource typeBoolean = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_BOOL);
+ public static final Resource typeFile = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_FILE);
+ public static final Resource typeInt = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_INT);
+ public static final Resource typeSet = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_SET);
+ public static final Resource typeString = Vocab.resource(NS, GenericAnalyzerAssembler.TYPE_STRING);
public static final Property pClass = Vocab.property(NS, "class");
public static final Property pParams = Vocab.property(NS, "params");
public static final Property pParamName = Vocab.property(NS, "paramName");
http://git-wip-us.apache.org/repos/asf/jena/blob/311efab2/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index 87c5d75..3effc39 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -67,13 +67,6 @@ public class TestGenericAnalyzerAssembler {
private static final String FILE_STOPS = "testing/some-stop-words.txt";
- private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
- private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
- private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
- private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
- private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
- private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
-
static {
TextAssembler.init();
Model model = ModelFactory.createDefaultModel();
@@ -106,7 +99,7 @@ public class TestGenericAnalyzerAssembler {
new RDFNode[] {
model.createResource()
.addProperty(TextVocab.pParamName, "stopWords")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_SET)
+ .addProperty(TextVocab.pParamType, TextVocab.typeSet)
.addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
}))
;
@@ -121,14 +114,14 @@ public class TestGenericAnalyzerAssembler {
new RDFNode[] {
model.createResource()
.addProperty(TextVocab.pParamName, "defaultAnalyzer")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+ .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
.addProperty(TextVocab.pParamValue,
model.createResource()
.addProperty(RDF.type, TextVocab.simpleAnalyzer)
),
model.createResource()
.addProperty(TextVocab.pParamName, "maxShingleSize")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+ .addProperty(TextVocab.pParamType, TextVocab.typeInt)
.addLiteral(TextVocab.pParamValue, 3)
}))
;
@@ -143,34 +136,34 @@ public class TestGenericAnalyzerAssembler {
new RDFNode[] {
model.createResource()
.addProperty(TextVocab.pParamName, "delegate")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+ .addProperty(TextVocab.pParamType, TextVocab.typeAnalyzer)
.addProperty(TextVocab.pParamValue,
model.createResource()
.addProperty(RDF.type, TextVocab.simpleAnalyzer)
) ,
model.createResource()
.addProperty(TextVocab.pParamName, "minShingleSize")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+ .addProperty(TextVocab.pParamType, TextVocab.typeInt)
.addLiteral(TextVocab.pParamValue, 2) ,
model.createResource()
.addProperty(TextVocab.pParamName, "maxShingleSize")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+ .addProperty(TextVocab.pParamType, TextVocab.typeInt)
.addLiteral(TextVocab.pParamValue, 4) ,
model.createResource()
.addProperty(TextVocab.pParamName, "tokenSeparator")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+ .addProperty(TextVocab.pParamType, TextVocab.typeString)
.addLiteral(TextVocab.pParamValue, "|") ,
model.createResource()
.addProperty(TextVocab.pParamName, "outputUnigrams")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+ .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
.addLiteral(TextVocab.pParamValue, false) ,
model.createResource()
.addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+ .addProperty(TextVocab.pParamType, TextVocab.typeBoolean)
.addLiteral(TextVocab.pParamValue, true) ,
model.createResource()
.addProperty(TextVocab.pParamName, "fillerToken")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+ .addProperty(TextVocab.pParamType, TextVocab.typeString)
.addLiteral(TextVocab.pParamValue, "foo")
}))
;
@@ -185,7 +178,7 @@ public class TestGenericAnalyzerAssembler {
new RDFNode[] {
model.createResource()
.addProperty(TextVocab.pParamName, "stopWords")
- .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+ .addProperty(TextVocab.pParamType, TextVocab.typeFile)
.addProperty(TextVocab.pParamValue, FILE_STOPS)
}))
;