You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:20 UTC

[11/18] jena git commit: factor DefinedAnalyzerAssembler and DefineAnalyzersAssembler into separate classes; move defined analyzer cache to Utils along side the language tagged analyzers since both caches have the same lifetime and similar uses.

factor DefinedAnalyzerAssembler and DefineAnalyzersAssembler into
separate classes; move defined analyzer cache to Utils along side the
language tagged analyzers since both caches have the same lifetime and
similar uses.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/5edb6c87
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/5edb6c87
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/5edb6c87

Branch: refs/heads/master
Commit: 5edb6c8758124fe8dd5a96d7b92949fc3ac1f61f
Parents: 311efab
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sun Apr 23 10:13:09 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sun Apr 23 10:13:09 2017 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/analyzer/Util.java   |  12 +++
 .../assembler/DefineAnalyzersAssembler.java     | 105 +++++++++++++++++++
 .../assembler/DefinedAnalyzerAssembler.java     | 103 +++++-------------
 .../assembler/TextIndexLuceneAssembler.java     |  10 +-
 4 files changed, 152 insertions(+), 78 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 20c7573..6ad0747 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -18,6 +18,7 @@
 
 package org.apache.jena.query.text.analyzer;
 
+import org.apache.jena.rdf.model.Resource;
 import org.apache.lucene.analysis.Analyzer;
 import java.lang.reflect.Constructor;
 import java.util.Hashtable;
@@ -26,6 +27,9 @@ public class Util {
 
     private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
     private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+    
+    // cache of defined text:defineAnalyzers
+    private static Hashtable<String, Analyzer> definedAnalyzers = new Hashtable<>();
 
     static {
         initAnalyzerDefs();
@@ -55,6 +59,14 @@ public class Util {
     public static void addAnalyzer(String lang, Analyzer analyzer) {
         cache.put(lang, analyzer);
     }
+    
+    public static Analyzer getDefinedAnalyzer(Resource key) {
+        return definedAnalyzers.get(key.getURI());
+    }
+    
+    public static void defineAnalyzer(Resource key, Analyzer analyzer) {
+        definedAnalyzers.put(key.getURI(), analyzer);
+    }
 
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();

http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
new file mode 100644
index 0000000..11270e2
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefineAnalyzersAssembler {
+    /*
+    <#indexLucene> a text:TextIndexLucene ;
+        text:directory <file:Lucene> ;
+        text:entityMap <#entMap> ;
+        text:defineAnalyzers (
+            [text:addLang "sa-x-iast" ;
+             text:analyzer [ . . . ]]
+            [text:defineAnalyzer <#foo> ;
+             text:analyzer [ . . . ]]
+        )
+    */
+
+    public static boolean open(Assembler a, Resource list) {
+        Resource current = list;
+        boolean isMultilingualSupport = false;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            // process the current list element to add an analyzer 
+            Resource adding = (Resource) first;
+            if (adding.hasProperty(TextVocab.pAnalyzer)) {
+                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+                RDFNode analyzerNode = analyzerStmt.getObject();
+                if (!analyzerNode.isResource()) {
+                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+                }
+                
+                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+                
+                if (adding.hasProperty(TextVocab.pAddLang)) {
+                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+                    String langCode = langStmt.getString();
+                    Util.addAnalyzer(langCode, analyzer);
+                    isMultilingualSupport = true;
+                }
+                
+                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+                    Resource id = defStmt.getResource();
+                    
+                    if (id.getURI() != null) {
+                        Util.defineAnalyzer(id, analyzer);
+                    } else {
+                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+                    }
+                }
+            } else {
+                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+            }
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return isMultilingualSupport;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
index e7bd941..e6909ac 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -1,88 +1,39 @@
-package org.apache.jena.query.text.assembler;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
-import java.util.Hashtable;
+package org.apache.jena.query.text.assembler;
 
 import org.apache.jena.assembler.Assembler;
 import org.apache.jena.assembler.Mode;
 import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.query.text.TextIndexException;
 import org.apache.jena.query.text.analyzer.Util;
-import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.rdf.model.Statement;
-import org.apache.jena.vocabulary.RDF;
-import org.apache.lucene.analysis.Analyzer;
 
 public class DefinedAnalyzerAssembler extends AssemblerBase {
-    
-    private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
-    
-    public static void addAnalyzer(Resource key, Analyzer analyzer) {
-        analyzers.put(key, analyzer);
-    }
-    
-    public static boolean addAnalyzers(Assembler a, Resource list) {
-        Resource current = list;
-        boolean isMultilingualSupport = false;
-        
-        while (current != null && ! current.equals(RDF.nil)){
-            Statement firstStmt = current.getProperty(RDF.first);
-            if (firstStmt == null) {
-                throw new TextIndexException("parameter list not well formed: " + current);
-            }
-            
-            RDFNode first = firstStmt.getObject();
-            if (! first.isResource()) {
-                throw new TextIndexException("parameter specification must be an anon resource : " + first);
-            }
-
-            // process the current list element to add an analyzer 
-            Resource adding = (Resource) first;
-            if (adding.hasProperty(TextVocab.pAnalyzer)) {
-                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
-                RDFNode analyzerNode = analyzerStmt.getObject();
-                if (!analyzerNode.isResource()) {
-                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
-                }
-                
-                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
-                
-                if (adding.hasProperty(TextVocab.pAddLang)) {
-                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
-                    String langCode = langStmt.getString();
-                    Util.addAnalyzer(langCode, analyzer);
-                    isMultilingualSupport = true;
-                }
-                
-                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
-                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
-                    Resource id = defStmt.getResource();
-                    
-                    if (id.getURI() != null) {
-                        DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
-                    } else {
-                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
-                    }
-                }
-            } else {
-                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
-            }
-            
-            Statement restStmt = current.getProperty(RDF.rest);
-            if (restStmt == null) {
-                throw new TextIndexException("parameter list not terminated by rdf:nil");
-            }
-            
-            RDFNode rest = restStmt.getObject();
-            if (! rest.isResource()) {
-                throw new TextIndexException("parameter list node is not a resource : " + rest);
-            }
-            
-            current = (Resource) rest;
-        }
-        
-        return isMultilingualSupport;
-    }
+    /*
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a text:DefinedAnalyzer ;
+               text:useAnalyzer <#Foo> ]
+     */
    
     @Override
     public Object open(Assembler a, Resource root, Mode mode) {
@@ -91,7 +42,7 @@ public class DefinedAnalyzerAssembler extends AssemblerBase {
             Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
             Resource key = useStmt.getResource();
             
-            return analyzers.get(key);
+            return Util.getDefinedAnalyzer(key);
         }
         
         return null;

http://git-wip-us.apache.org/repos/asf/jena/blob/5edb6c87/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 7acfb9e..14af9bf 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -26,6 +26,7 @@ import org.apache.jena.assembler.Mode ;
 import org.apache.jena.assembler.assemblers.AssemblerBase ;
 import org.apache.jena.atlas.io.IO ;
 import org.apache.jena.atlas.lib.IRILib ;
+import org.apache.jena.atlas.logging.Log;
 import org.apache.jena.query.text.*;
 import org.apache.jena.rdf.model.RDFNode ;
 import org.apache.jena.rdf.model.Resource ;
@@ -120,10 +121,15 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
             if (null != defAnalyzersStatement) {
                 RDFNode aNode = defAnalyzersStatement.getObject();
                 if (! aNode.isResource()) {
-                    throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+                    throw new TextIndexException("text:defineAnalyzers property is not a resource (list) : " + aNode);
                 }
-                boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+                boolean addedLangs = DefineAnalyzersAssembler.open(a, (Resource) aNode);
+                // if the text:defineAnalyzers added any analyzers to lang tags then ensure that
+                // multilingual support is enabled
                 if (addedLangs) {
+                    if (!isMultilingualSupport) {
+                        Log.warn(this,  "Multilingual support implicitly enabled by text:defineAnalyzers");
+                    }
                     isMultilingualSupport = true;
                 }
             }