You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:18 UTC

[09/18] jena git commit: added analyzer definitions: 1) DefinedAnalyzers for use in text:map; 2) add analyzers to Multilingual support based on BCP47 codes

added analyzer definitions: 1) DefinedAnalyzers for use in text:map; 2)
add analyzers to Multilingual support based on BCP47 codes

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/a3bb8e41
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/a3bb8e41
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/a3bb8e41

Branch: refs/heads/master
Commit: a3bb8e41aeaf9be3540cf0a6be84cd9dc9b43b28
Parents: 57ded6a
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 16:31:54 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 16:31:54 2017 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/analyzer/Util.java   |   4 +
 .../assembler/DefinedAnalyzerAssembler.java     | 100 +++++++++++++++++++
 .../query/text/assembler/TextAssembler.java     |   1 +
 .../assembler/TextIndexLuceneAssembler.java     |  12 +++
 .../jena/query/text/assembler/TextVocab.java    |   5 +
 5 files changed, 122 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index fb2582a..20c7573 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -51,6 +51,10 @@ public class Util {
             return null;
         }
     }
+    
+    public static void addAnalyzer(String lang, Analyzer analyzer) {
+        cache.put(lang, analyzer);
+    }
 
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
new file mode 100644
index 0000000..e7bd941
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefinedAnalyzerAssembler.java
@@ -0,0 +1,100 @@
+package org.apache.jena.query.text.assembler;
+
+import java.util.Hashtable;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.analyzer.Util;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.rdf.model.Statement;
+import org.apache.jena.vocabulary.RDF;
+import org.apache.lucene.analysis.Analyzer;
+
+public class DefinedAnalyzerAssembler extends AssemblerBase {
+    
+    private static Hashtable<Resource, Analyzer> analyzers = new Hashtable<>();
+    
+    public static void addAnalyzer(Resource key, Analyzer analyzer) {
+        analyzers.put(key, analyzer);
+    }
+    
+    public static boolean addAnalyzers(Assembler a, Resource list) {
+        Resource current = list;
+        boolean isMultilingualSupport = false;
+        
+        while (current != null && ! current.equals(RDF.nil)){
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException("parameter list not well formed: " + current);
+            }
+            
+            RDFNode first = firstStmt.getObject();
+            if (! first.isResource()) {
+                throw new TextIndexException("parameter specification must be an anon resource : " + first);
+            }
+
+            // process the current list element to add an analyzer 
+            Resource adding = (Resource) first;
+            if (adding.hasProperty(TextVocab.pAnalyzer)) {
+                Statement analyzerStmt = adding.getProperty(TextVocab.pAnalyzer);
+                RDFNode analyzerNode = analyzerStmt.getObject();
+                if (!analyzerNode.isResource()) {
+                    throw new TextIndexException("addAnalyzers text:analyzer must be an analyzer spec resource: " + analyzerNode);
+                }
+                
+                Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
+                
+                if (adding.hasProperty(TextVocab.pAddLang)) {
+                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+                    String langCode = langStmt.getString();
+                    Util.addAnalyzer(langCode, analyzer);
+                    isMultilingualSupport = true;
+                }
+                
+                if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
+                    Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
+                    Resource id = defStmt.getResource();
+                    
+                    if (id.getURI() != null) {
+                        DefinedAnalyzerAssembler.addAnalyzer(id, analyzer);
+                    } else {
+                        throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
+                    }
+                }
+            } else {
+                throw new TextIndexException("text:analyzer property is required when adding an analyzer: " + adding);
+            }
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException("parameter list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException("parameter list node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+        
+        return isMultilingualSupport;
+    }
+   
+    @Override
+    public Object open(Assembler a, Resource root, Mode mode) {
+        
+        if (root.hasProperty(TextVocab.pUseAnalyzer)) {
+            Statement useStmt = root.getProperty(TextVocab.pUseAnalyzer);
+            Resource key = useStmt.getResource();
+            
+            return analyzers.get(key);
+        }
+        
+        return null;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 45f5cee..6cbb2da 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -38,6 +38,7 @@ public class TextAssembler
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new ConfigurableAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.genericAnalyzer,  new GenericAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.definedAnalyzer,  new DefinedAnalyzerAssembler()) ;
 
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 0ec1e5b..7acfb9e 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -115,6 +115,18 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
                 }
                 isMultilingualSupport = mlsNode.asLiteral().getBoolean();
             }
+            
+            Statement defAnalyzersStatement = root.getProperty(pDefAnalyzers);
+            if (null != defAnalyzersStatement) {
+                RDFNode aNode = defAnalyzersStatement.getObject();
+                if (! aNode.isResource()) {
+                    throw new TextIndexException("text:defineAnalyzers property is not a resource : " + aNode);
+                }
+                boolean addedLangs = DefinedAnalyzerAssembler.addAnalyzers(a, (Resource) aNode);
+                if (addedLangs) {
+                    isMultilingualSupport = true;
+                }
+            }
 
             boolean storeValues = false;
             Statement storeValuesStatement = root.getProperty(pStoreValues);

http://git-wip-us.apache.org/repos/asf/jena/blob/a3bb8e41/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index cd1844d..b051252 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -88,10 +88,15 @@ public class TextVocab
 
     //GenericAnalyzer
     public static final Resource genericAnalyzer    = Vocab.resource(NS, "GenericAnalyzer");
+    public static final Resource definedAnalyzer    = Vocab.resource(NS, "DefinedAnalyzer");
     public static final Property pClass             = Vocab.property(NS, "class");
     public static final Property pParams            = Vocab.property(NS, "params");
     public static final Property pParamName         = Vocab.property(NS, "paramName");
     public static final Property pParamType         = Vocab.property(NS, "paramType");
     public static final Property pParamValue        = Vocab.property(NS, "paramValue");
+    public static final Property pDefAnalyzers      = Vocab.property(NS, "defineAnalyzers");
+    public static final Property pDefAnalyzer       = Vocab.property(NS, "defineAnalyzer");
+    public static final Property pAddLang           = Vocab.property(NS, "addLang");
+    public static final Property pUseAnalyzer       = Vocab.property(NS, "useAnalyzer");
 }