You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:18 UTC
[09/18] jena git commit: added analyzer definitions: 1)
DefinedAnalyzers for use in text:map;
2) add analyzers to Multilingual support based on BCP47 codes
added analyzer definitions: 1) DefinedAnalyzers for use in text:map; 2)
add analyzers to Multilingual support based on BCP47 codes
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/a3bb8e41
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/a3bb8e41
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/a3bb8e41
Branch: refs/heads/master
Commit: a3bb8e41aeaf9be3540cf0a6be84cd9dc9b43b28
Parents: 57ded6a
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 16:31:54 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 16:31:54 2017 -0500
----------------------------------------------------------------------
.../apache/jena/query/text/analyzer/Util.java | 4 +
.../assembler/DefinedAnalyzerAssembler.java | 100 +++++++++++++++++++
.../query/text/assembler/TextAssembler.java | 1 +
.../assembler/TextIndexLuceneAssembler.java | 12 +++
.../jena/query/text/assembler/TextVocab.java | 5 +
5 files changed, 122 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index fb2582a..20c7573 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -51,6 +51,10 @@ public class Util {
return null;
}
}
+
+ public static void addAnalyzer(String lang, Analyzer analyzer) {
+ cache.put(lang, analyzer);
+ }
private static void initAnalyzerDefs() {
analyzersClasses = new Hashtable<>();
http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
new file mode 100644
index 0000000..e7bd941
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -0,0 +1,100 @@
+package org.apache.jena.query.text.assembler;
+
+import java.util.Hashtable;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefinedAnalyzerAssembler extends AssemblerBase {
+
+ private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
+
+ public static void addAnalyzer(Resource key, Analyzer analyzer) {
+ analyzers.put(key, analyzer);
+ }
+
+ public static boolean addAnalyzers(Assembler a, Resource list) {
+ Resource current = list;
+ boolean isMultilingualSupport = false;
+
+ while (current != null && ! current.equals(RDF.nil)){
+ Statement firstStmt = current.getProperty(RDF.first);
+ if (firstStmt == null) {
+ throw new TextIndexException("parameter list not well formed: " + current);
+ }
+
+ RDFNode first = firstStmt.getObject();
+ if (! first.isResource()) {
+ throw new TextIndexException("parameter specification must be an anon resource : " + first);
+ }
+
+ // process the current list element to add an analyzer
+ Resource adding = (Resource) first;
+ if (adding.hasProperty(TextVocab.pAnalyzer)) {
+ Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+ RDFNode analyzerNode = analyzerStmt.getObject();
+ if (!analyzerNode.isResource()) {
+ throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+ }
+
+ Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+
+ if (adding.hasProperty(TextVocab.pAddLang)) {
+ Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+ String langCode = langStmt.getString();
+ Util.addAnalyzer(langCode, analyzer);
+ isMultilingualSupport = true;
+ }
+
+ if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+ Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+ Resource id = defStmt.getResource();
+
+ if (id.getURI() != null) {
+ DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
+ } else {
+ throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+ }
+ }
+ } else {
+ throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+ }
+
+ Statement restStmt = current.getProperty(RDF.rest);
+ if (restStmt == null) {
+ throw new TextIndexException("parameter list not terminated by rdf:nil");
+ }
+
+ RDFNode rest = restStmt.getObject();
+ if (! rest.isResource()) {
+ throw new TextIndexException("parameter list node is not a resource : " + rest);
+ }
+
+ current = (Resource) rest;
+ }
+
+ return isMultilingualSupport;
+ }
+
+ @Override
+ public Object open(Assembler a, Resource root, Mode mode) {
+
+ if (root.hasProperty(TextVocab.pUseAnalyzer)) {
+ Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
+ Resource key = useStmt.getResource();
+
+ return analyzers.get(key);
+ }
+
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 45f5cee..6cbb2da 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -38,6 +38,7 @@ public class TextAssembler
Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.genericAnalyzer, new GenericAnalyzerAssembler()) ;
+ Assembler.general.implementWith(TextVocab.definedAnalyzer, new DefinedAnalyzerAssembler()) ;
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 0ec1e5b..7acfb9e 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -115,6 +115,18 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
}
isMultilingualSupport = mlsNode.asLiteral().getBoolean();
}
+
+ Statement defAnalyzersStatement = root.getProperty(pDefAnalyzers);
+ if (null != defAnalyzersStatement) {
+ RDFNode aNode = defAnalyzersStatement.getObject();
+ if (! aNode.isResource()) {
+ throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+ }
+ boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+ if (addedLangs) {
+ isMultilingualSupport = true;
+ }
+ }
boolean storeValues = false;
Statement storeValuesStatement = root.getProperty(pStoreValues);
http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index cd1844d..b051252 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -88,10 +88,15 @@ public class TextVocab
//GenericAnalyzer
public static final Resource genericAnalyzer = Vocab.resource(NS, "GenericAnalyzer");
+ public static final Resource definedAnalyzer = Vocab.resource(NS, "DefinedAnalyzer");
public static final Property pClass = Vocab.property(NS, "class");
public static final Property pParams = Vocab.property(NS, "params");
public static final Property pParamName = Vocab.property(NS, "paramName");
public static final Property pParamType = Vocab.property(NS, "paramType");
public static final Property pParamValue = Vocab.property(NS, "paramValue");
+ public static final Property pDefAnalyzers = Vocab.property(NS, "defineAnalyzers");
+ public static final Property pDefAnalyzer = Vocab.property(NS, "defineAnalyzer");
+ public static final Property pAddLang = Vocab.property(NS, "addLang");
+ public static final Property pUseAnalyzer = Vocab.property(NS, "useAnalyzer");
}