You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:20 UTC
[11/18] jena git commit: factor DefinedAnalyzerAssembler and
DefineAnalyzersAssembler into separate classes;
move defined analyzer cache to Utils along side the language tagged analyzers
since both caches have the same lifetime and similar uses.
factor DefinedAnalyzerAssembler and DefineAnalyzersAssembler into
separate classes; move defined analyzer cache to Utils along side the
language tagged analyzers since both caches have the same lifetime and
similar uses.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/5edb6c87
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/5edb6c87
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/5edb6c87
Branch: refs/heads/master
Commit: 5edb6c8758124fe8dd5a96d7b92949fc3ac1f61f
Parents: 311efab
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 10:13:09 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 10:13:09 2017 -0500
----------------------------------------------------------------------
.../apache/jena/query/text/analyzer/Util.java | 12 +++
.../assembler/DefineAnalyzersAssembler.java | 105 +++++++++++++++++++
.../assembler/DefinedAnalyzerAssembler.java | 103 +++++-------------
.../assembler/TextIndexLuceneAssembler.java | 10 +-
4 files changed, 152 insertions(+), 78 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 20c7573..6ad0747 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -18,6 +18,7 @@
package org.apache.jena.query.text.analyzer;
+import org.apache.jena.rdf.model.Resource;
import org.apache.lucene.analysis.Analyzer;
import java.lang.reflect.Constructor;
import java.util.Hashtable;
@@ -26,6 +27,9 @@ public class Util {
private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+
+ // cache of defined text:defineAnalyzers
+ private static Hashtable<String, Analyzer> definedAnalyzers = new Hashtable<>();
static {
initAnalyzerDefs();
@@ -55,6 +59,14 @@ public class Util {
public static void addAnalyzer(String lang, Analyzer analyzer) {
cache.put(lang, analyzer);
}
+
+ public static Analyzer getDefinedAnalyzer(Resource key) {
+ return definedAnalyzers.get(key.getURI());
+ }
+
+ public static void defineAnalyzer(Resource key, Analyzer analyzer) {
+ definedAnalyzers.put(key.getURI(), analyzer);
+ }
private static void initAnalyzerDefs() {
analyzersClasses = new Hashtable<>();
http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
new file mode 100644
index 0000000..11270e2
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefineAnalyzersAssembler {
+ /*
+ <#indexLucene> a text:TextIndexLucene ;
+ text:directory <file:Lucene> ;
+ text:entityMap <#entMap> ;
+ text:defineAnalyzers (
+ [text:addLang "sa-x-iast" ;
+ text:analyzer [ . . . ]]
+ [text:defineAnalyzer <#foo> ;
+ text:analyzer [ . . . ]]
+ )
+ */
+
+ public static boolean open(Assembler a, Resource list) {
+ Resource current = list;
+ boolean isMultilingualSupport = false;
+
+ while (current != null && ! current.equals(RDF.nil)){
+ Statement firstStmt = current.getProperty(RDF.first);
+ if (firstStmt == null) {
+ throw new TextIndexException("parameter list not well formed: " + current);
+ }
+
+ RDFNode first = firstStmt.getObject();
+ if (! first.isResource()) {
+ throw new TextIndexException("parameter specification must be an anon resource : " + first);
+ }
+
+ // process the current list element to add an analyzer
+ Resource adding = (Resource) first;
+ if (adding.hasProperty(TextVocab.pAnalyzer)) {
+ Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+ RDFNode analyzerNode = analyzerStmt.getObject();
+ if (!analyzerNode.isResource()) {
+ throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+ }
+
+ Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+
+ if (adding.hasProperty(TextVocab.pAddLang)) {
+ Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+ String langCode = langStmt.getString();
+ Util.addAnalyzer(langCode, analyzer);
+ isMultilingualSupport = true;
+ }
+
+ if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+ Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+ Resource id = defStmt.getResource();
+
+ if (id.getURI() != null) {
+ Util.defineAnalyzer(id, analyzer);
+ } else {
+ throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+ }
+ }
+ } else {
+ throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+ }
+
+ Statement restStmt = current.getProperty(RDF.rest);
+ if (restStmt == null) {
+ throw new TextIndexException("parameter list not terminated by rdf:nil");
+ }
+
+ RDFNode rest = restStmt.getObject();
+ if (! rest.isResource()) {
+ throw new TextIndexException("parameter list node is not a resource : " + rest);
+ }
+
+ current = (Resource) rest;
+ }
+
+ return isMultilingualSupport;
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
index e7bd941..e6909ac 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -1,88 +1,39 @@
-package org.apache.jena.query.text.assembler;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-import java.util.Hashtable;
+package org.apache.jena.query.text.assembler;
import org.apache.jena.assembler.Assembler;
import org.apache.jena.assembler.Mode;
import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.query.text.TextIndexException;
import org.apache.jena.query.text.analyzer.Util;
-import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
-import org.apache.jena.vocabulary.RDF;
-import org.apache.lucene.analysis.Analyzer;
public class DefinedAnalyzerAssembler extends AssemblerBase {
-
- private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
-
- public static void addAnalyzer(Resource key, Analyzer analyzer) {
- analyzers.put(key, analyzer);
- }
-
- public static boolean addAnalyzers(Assembler a, Resource list) {
- Resource current = list;
- boolean isMultilingualSupport = false;
-
- while (current != null && ! current.equals(RDF.nil)){
- Statement firstStmt = current.getProperty(RDF.first);
- if (firstStmt == null) {
- throw new TextIndexException("parameter list not well formed: " + current);
- }
-
- RDFNode first = firstStmt.getObject();
- if (! first.isResource()) {
- throw new TextIndexException("parameter specification must be an anon resource : " + first);
- }
-
- // process the current list element to add an analyzer
- Resource adding = (Resource) first;
- if (adding.hasProperty(TextVocab.pAnalyzer)) {
- Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
- RDFNode analyzerNode = analyzerStmt.getObject();
- if (!analyzerNode.isResource()) {
- throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
- }
-
- Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
-
- if (adding.hasProperty(TextVocab.pAddLang)) {
- Statement langStmt = adding.getProperty(TextVocab.pAddLang);
- String langCode = langStmt.getString();
- Util.addAnalyzer(langCode, analyzer);
- isMultilingualSupport = true;
- }
-
- if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
- Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
- Resource id = defStmt.getResource();
-
- if (id.getURI() != null) {
- DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
- } else {
- throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
- }
- }
- } else {
- throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
- }
-
- Statement restStmt = current.getProperty(RDF.rest);
- if (restStmt == null) {
- throw new TextIndexException("parameter list not terminated by rdf:nil");
- }
-
- RDFNode rest = restStmt.getObject();
- if (! rest.isResource()) {
- throw new TextIndexException("parameter list node is not a resource : " + rest);
- }
-
- current = (Resource) rest;
- }
-
- return isMultilingualSupport;
- }
+ /*
+ text:map (
+ [ text:field "text" ;
+ text:predicate rdfs:label;
+ text:analyzer [
+ a text:DefinedAnalyzer ;
+ text:useAnalyzer <#Foo> ]
+ */
@Override
public Object open(Assembler a, Resource root, Mode mode) {
@@ -91,7 +42,7 @@ public class DefinedAnalyzerAssembler extends AssemblerBase {
Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
Resource key = useStmt.getResource();
- return analyzers.get(key);
+ return Util.getDefinedAnalyzer(key);
}
return null;
http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 7acfb9e..14af9bf 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -26,6 +26,7 @@ import org.apache.jena.assembler.Mode ;
import org.apache.jena.assembler.assemblers.AssemblerBase ;
import org.apache.jena.atlas.io.IO ;
import org.apache.jena.atlas.lib.IRILib ;
+import org.apache.jena.atlas.logging.Log;
import org.apache.jena.query.text.*;
import org.apache.jena.rdf.model.RDFNode ;
import org.apache.jena.rdf.model.Resource ;
@@ -120,10 +121,15 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
if (null != defAnalyzersStatement) {
RDFNode aNode = defAnalyzersStatement.getObject();
if (! aNode.isResource()) {
- throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+ throw new TextIndexException("text:defineAnalyzers property is not a resource (list) : " + aNode);
}
- boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+ boolean addedLangs = DefineAnalyzersAssembler.open(a, (Resource) aNode);
+ // if the text:defineAnalyzers added any analyzers to lang tags then ensure that
+ // multilingual support is enabled
if (addedLangs) {
+ if (!isMultilingualSupport) {
+ Log.warn(this, "Multilingual support implicitly enabled by text:defineAnalyzers");
+ }
isMultilingualSupport = true;
}
}