You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by co...@apache.org on 2018/06/20 20:59:59 UTC

[1/6] jena git commit: JENA-1556 implementation

Repository: jena
Updated Branches:
  refs/heads/master 3e999d55d -> fe9bdefa4


JENA-1556 implementation

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0d07ca90
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0d07ca90
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0d07ca90

Branch: refs/heads/master
Commit: 0d07ca904f9495f5a320faf5b7dd761d5f7294ab
Parents: f0b0522
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Jun 13 13:13:15 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Jun 13 13:13:15 2018 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/TextIndexLucene.java | 156 ++++++++++++-------
 .../analyzer/IndexingMultilingualAnalyzer.java  |  61 ++++++++
 .../text/analyzer/MultilingualAnalyzer.java     |   7 +-
 .../analyzer/QueryMultilingualAnalyzer.java     |  76 +++++++++
 .../apache/jena/query/text/analyzer/Util.java   |  51 +++++-
 .../assembler/DefineAnalyzersAssembler.java     |  82 +++++++++-
 .../jena/query/text/assembler/TextVocab.java    |   4 +
 7 files changed, 371 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 3eacfbe..cd4d63f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -31,7 +31,10 @@ import org.apache.jena.datatypes.TypeMapper ;
 import org.apache.jena.datatypes.xsd.XSDDatatype ;
 import org.apache.jena.graph.Node ;
 import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.query.text.analyzer.IndexingMultilingualAnalyzer;
 import org.apache.jena.query.text.analyzer.MultilingualAnalyzer;
+import org.apache.jena.query.text.analyzer.QueryMultilingualAnalyzer;
+import org.apache.jena.query.text.analyzer.Util;
 import org.apache.jena.sparql.util.NodeFactoryExtra ;
 import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.analysis.TokenStream;
@@ -85,14 +88,16 @@ public class TextIndexLucene implements TextIndex {
         ftIRI = new FieldType() ;
         ftIRI.setTokenized(false) ;
         ftIRI.setStored(true) ;
-		ftIRI.setIndexOptions(IndexOptions.DOCS);
+        ftIRI.setIndexOptions(IndexOptions.DOCS);
         ftIRI.freeze() ;
     }
     public static final FieldType  ftString = StringField.TYPE_NOT_STORED ;
 
     private final EntityDefinition docDef ;
     private final Directory        directory ;
-    private final Analyzer         analyzer ;
+    private final Analyzer         indexAnalyzer ;
+    private       Analyzer         defaultAnalyzer ;
+    private       Map<String, Analyzer> analyzerPerField;
     private final Analyzer         queryAnalyzer ;
     private final String           queryParserType ;
     private final FieldType        ftText ;
@@ -122,7 +127,7 @@ public class TextIndexLucene implements TextIndex {
 
         // create the analyzer as a wrapper that uses KeywordAnalyzer for
         // entity and graph fields and the configured analyzer(s) for all other
-        Map<String, Analyzer> analyzerPerField = new HashMap<>() ;
+        analyzerPerField = new HashMap<>() ;
         analyzerPerField.put(docDef.getEntityField(), new KeywordAnalyzer()) ;
         if ( docDef.getGraphField() != null )
             analyzerPerField.put(docDef.getGraphField(), new KeywordAnalyzer()) ;
@@ -136,12 +141,17 @@ public class TextIndexLucene implements TextIndex {
             }
         }
 
-        Analyzer defaultAnalyzer = (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer();
-        if (this.isMultilingual)
-            defaultAnalyzer = new MultilingualAnalyzer(defaultAnalyzer);
-        this.analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField) ;
-        this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
+        defaultAnalyzer = (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer();
+        Analyzer indexDefault = defaultAnalyzer;
+        Analyzer queryDefault = defaultAnalyzer;
+        if (this.isMultilingual) {
+            queryDefault = new MultilingualAnalyzer(defaultAnalyzer);
+            indexDefault = Util.usingIndexAnalyzers() ? new IndexingMultilingualAnalyzer(defaultAnalyzer) : queryDefault;
+        }
+        this.indexAnalyzer = new PerFieldAnalyzerWrapper(indexDefault, analyzerPerField) ;
+        this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : new PerFieldAnalyzerWrapper(queryDefault, analyzerPerField) ;
         this.queryParserType = config.getQueryParser() ;
+        log.debug("TextIndexLucene defaultAnalyzer: {}, indexAnalyzer: {}, queryAnalyzer: {}, queryParserType: {}", defaultAnalyzer, indexAnalyzer, queryAnalyzer, queryParserType);
         this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ;
         if (config.isValueStored() && docDef.getLangField() == null)
             log.warn("Values stored but langField not set. Returned values will not have language tag or datatype.");
@@ -150,7 +160,7 @@ public class TextIndexLucene implements TextIndex {
     }
 
     private void openIndexWriter() {
-        IndexWriterConfig wConfig = new IndexWriterConfig(analyzer) ;
+        IndexWriterConfig wConfig = new IndexWriterConfig(indexAnalyzer) ;
         try
         {
             indexWriter = new IndexWriter(directory, wConfig) ;
@@ -158,12 +168,12 @@ public class TextIndexLucene implements TextIndex {
             indexWriter.commit();
         }
         catch (IndexFormatTooOldException e) {
-        	throw new TextIndexException("jena-text/Lucene cannot use indexes created before Jena 3.3.0. "
-        		+ "Please rebuild your text index using jena.textindexer from Jena 3.3.0 or above.", e);
+            throw new TextIndexException("jena-text/Lucene cannot use indexes created before Jena 3.3.0. "
+                + "Please rebuild your text index using jena.textindexer from Jena 3.3.0 or above.", e);
         }
         catch (IOException e)
         {
-            throw new TextIndexException(e) ;
+            throw new TextIndexException("openIndexWriter", e) ;
         }
     }
 
@@ -172,7 +182,7 @@ public class TextIndexLucene implements TextIndex {
     }
 
     public Analyzer getAnalyzer() {
-        return analyzer ;
+        return indexAnalyzer ;
     }
 
     public Analyzer getQueryAnalyzer() {
@@ -189,7 +199,7 @@ public class TextIndexLucene implements TextIndex {
             indexWriter.prepareCommit();
         }
         catch (IOException e) {
-            throw new TextIndexException(e);
+            throw new TextIndexException("prepareCommit", e);
         }
     }
 
@@ -199,7 +209,7 @@ public class TextIndexLucene implements TextIndex {
             indexWriter.commit();
         }
         catch (IOException e) {
-            throw new TextIndexException(e);
+            throw new TextIndexException("commit", e);
         }
     }
 
@@ -211,7 +221,7 @@ public class TextIndexLucene implements TextIndex {
             idx.rollback();
         }
         catch (IOException e) {
-            throw new TextIndexException(e);
+            throw new TextIndexException("rollback", e);
         }
 
         // The rollback will close the indexWriter, so we need to reopen it
@@ -224,7 +234,7 @@ public class TextIndexLucene implements TextIndex {
             indexWriter.close() ;
         }
         catch (IOException ex) {
-            throw new TextIndexException(ex) ;
+            throw new TextIndexException("close", ex) ;
         }
     }
 
@@ -237,7 +247,7 @@ public class TextIndexLucene implements TextIndex {
         try {
             updateDocument(entity);
         } catch (IOException e) {
-            throw new TextIndexException(e) ;
+            throw new TextIndexException("updateEntity", e) ;
         }
     }
 
@@ -259,7 +269,7 @@ public class TextIndexLucene implements TextIndex {
             addDocument(entity);
         }
         catch (IOException e) {
-            throw new TextIndexException(e) ;
+            throw new TextIndexException("addEntity", e) ;
         }
     }
 
@@ -288,7 +298,7 @@ public class TextIndexLucene implements TextIndex {
             indexWriter.deleteDocuments(uid);
 
         } catch (Exception e) {
-            throw new TextIndexException(e) ;
+            throw new TextIndexException("deleteEntity", e) ;
         }
     }
 
@@ -316,6 +326,13 @@ public class TextIndexLucene implements TextIndex {
                     if (this.isMultilingual) {
                         // add a field that uses a language-specific analyzer via MultilingualAnalyzer
                         doc.add(new Field(e.getKey() + "_" + lang, (String) e.getValue(), ftText));
+                        // add fields for any defined auxiliary indexes
+                        List<String> auxIndexes = Util.getAuxIndexes(lang);
+                        if (auxIndexes != null) {
+                            for (String auxTag : auxIndexes) {
+                                doc.add(new Field(e.getKey() + "_" + auxTag, (String) e.getValue(), ftText));
+                            }
+                        }
                     }
                 } else if (datatype != null && !datatype.equals(XSDDatatype.XSDstring)) {
                     // for non-string and non-langString datatypes, store the datatype in langField
@@ -342,7 +359,7 @@ public class TextIndexLucene implements TextIndex {
             return x.get(0) ;
         }
         catch (Exception ex) {
-            throw new TextIndexException(ex) ;
+            throw new TextIndexException("get", ex) ;
         }
     }
 
@@ -416,7 +433,8 @@ public class TextIndexLucene implements TextIndex {
             throw new TextIndexParseException(qs, ex.getMessage()) ;
         }
         catch (Exception ex) {
-            throw new TextIndexException(ex) ;
+            ex.printStackTrace(); // TEMPORARY 
+            throw new TextIndexException("query", ex) ;
         }
     }
 
@@ -514,7 +532,7 @@ public class TextIndexLucene implements TextIndex {
         return rez;
     }
     
-    private List<TextHit> highlightResults(ScoreDoc[] sDocs, IndexSearcher indexSearcher, Query query, String field, String highlight) 
+    private List<TextHit> highlightResults(ScoreDoc[] sDocs, IndexSearcher indexSearcher, Query query, String field, String highlight, boolean useDocLang) 
             throws IOException, InvalidTokenOffsetsException { 
         List<TextHit> results = new ArrayList<>() ;
         
@@ -526,14 +544,15 @@ public class TextIndexLucene implements TextIndex {
 
         for ( ScoreDoc sd : sDocs ) {
             Document doc = indexSearcher.doc(sd.doc) ;
-            log.trace("highlightResults[{}]: {}", sd.doc, doc) ;
             String entity = doc.get(docDef.getEntityField()) ;
 
             Node literal = null;
             String lexical = doc.get(field) ;
+            String docLang = doc.get(docDef.getLangField()) ;
+            String effectiveField = useDocLang ? field + "_" + docLang : field;
+            log.trace("highlightResults[{}]: {}, field: {}, lexical: {}, docLang: {}, effectiveField: {}", sd.doc, doc, field, lexical, docLang, effectiveField) ;
             if (lexical != null) {
-                String docLang = doc.get(docDef.getLangField()) ;
-                TokenStream tokenStream = analyzer.tokenStream(field, lexical);
+                TokenStream tokenStream = queryAnalyzer.tokenStream(effectiveField, lexical);
                 TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, lexical, opts.joinFrags, opts.maxFrags);
                 String rez = frags2string(frags, opts);
                 
@@ -549,56 +568,81 @@ public class TextIndexLucene implements TextIndex {
         }
         return results ;
     }
+    
+    private Map<String, Analyzer> multilingualQueryAnalyzers = new HashMap<>();
+    
+    private Analyzer getQueryAnalyzer(boolean usingSearchFor, String lang) {
+        if (usingSearchFor) {
+            Analyzer qa = multilingualQueryAnalyzers.get(lang);
+            if (qa == null) {
+                qa = new PerFieldAnalyzerWrapper(new QueryMultilingualAnalyzer(defaultAnalyzer, lang), analyzerPerField);
+                multilingualQueryAnalyzers.put(lang, qa);
+            }
+            return qa;
+        } else {
+            return queryAnalyzer;
+        }
+    }
 
     private List<TextHit> query$(IndexReader indexReader, Node property, String qs, String graphURI, String lang, int limit, String highlight)
             throws ParseException, IOException, InvalidTokenOffsetsException {
-        String textField = docDef.getField(property);
-        String textClause;
-        String langClause = null;
-        String graphClause = null;
-
-        //for language-based search extension
-        if (getDocDef().getLangField() != null) {
-            String langField = getDocDef().getLangField();
-            if (StringUtils.isNotEmpty(lang)) {
-                if (this.isMultilingual && !lang.equals("none")) {
-                    textField = (textField == null ? docDef.getPrimaryField() : textField)  + "_" + lang;
-                }
-                langClause = !"none".equals(lang)?
-                        langField + ":" + lang : "-" + langField + ":*";
+        String textField = docDef.getField(property) != null ?  docDef.getField(property) : docDef.getPrimaryField();
+        String textClause = "";               
+        String langField = getDocDef().getLangField();
+        
+        List<String> searchForTags = Util.getSearchForTags(lang);
+        boolean usingSearchFor = !searchForTags.isEmpty();
+        if (usingSearchFor) {            
+            for (String tag : searchForTags) {
+                String tf = textField + "_" + tag;
+                textClause += tf + ":" + qs + " ";
             }
+        } else {
+            if (this.isMultilingual && StringUtils.isNotEmpty(lang) && !lang.equals("none")) {
+                textField += "_" + lang;
+            }
+            
+            if (docDef.getField(property) != null) {
+                textClause = textField + ":" + qs;
+            } else {
+                textClause = qs;
+            }
+           
+            String langClause = null;
+            if (langField != null) {
+                langClause = StringUtils.isNotEmpty(lang) ? (!lang.equals("none") ? langField + ":" + lang : "-" + langField + ":*") : null;
+            }
+            if (langClause != null)
+                textClause = "(" + textClause + ") AND " + langClause ;
         }
-
-        if (textField != null)
-            textClause = textField + ":" + qs ;
-        else
-            textClause = qs ;
         
-        String effectiveField = (textField != null) ? textField : docDef.getPrimaryField();
-
+        String graphClause = null;
         if (graphURI != null) {
             String escaped = QueryParserBase.escape(graphURI) ;
             graphClause = getDocDef().getGraphField() + ":" + escaped ;
         }
-
+        
         String queryString = textClause ;
-        if (langClause != null)
-            queryString = "(" + queryString + ") AND " + langClause ;
+
         if (graphClause != null)
             queryString = "(" + queryString + ") AND " + graphClause ;
+        
+        Analyzer qa = getQueryAnalyzer(usingSearchFor, lang);
+        Query query = parseQuery(queryString, qa) ;
+        
+        if ( limit <= 0 )
+            limit = MAX_N ;
 
-        log.debug("Lucene query: {} ({})", queryString, limit) ;
+        log.debug("Lucene queryString: {}, parsed query: {}, limit:{}", queryString, query, limit) ;
 
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
-        Query query = parseQuery(queryString, queryAnalyzer) ;
-        if ( limit <= 0 )
-            limit = MAX_N ;
+
         ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
         
         if (highlight != null) {
-            return highlightResults(sDocs, indexSearcher, query, effectiveField, highlight);
+            return highlightResults(sDocs, indexSearcher, query, textField, highlight, usingSearchFor);
         } else {
-            return simpleResults(sDocs, indexSearcher, query, effectiveField);
+            return simpleResults(sDocs, indexSearcher, query, textField);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
new file mode 100644
index 0000000..9f3b890
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer ;
+
+import org.apache.lucene.analysis.Analyzer ;
+import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/** 
+ * Lucene Analyzer implementation that delegates to a language-specific
+ * Analyzer based on a field name suffix: e.g. field="label_en" will use
+ * an EnglishAnalyzer.
+ */
+
+public class IndexingMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
+        private static Logger log = LoggerFactory.getLogger(IndexingMultilingualAnalyzer.class);
+        
+        private Analyzer defaultAnalyzer;
+
+        public IndexingMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+                super(PER_FIELD_REUSE_STRATEGY);
+                this.defaultAnalyzer = defaultAnalyzer;
+        }
+
+        @Override
+        protected Analyzer getWrappedAnalyzer(String fieldName) {
+                int idx = fieldName.lastIndexOf("_");
+                if (idx == -1) { // not language-specific, e.g. "label"
+                        return defaultAnalyzer;
+                }
+                String lang = fieldName.substring(idx+1);
+                Analyzer analyzer = Util.getIndexAnalyzer(lang);
+                analyzer = analyzer != null ? analyzer : Util.getLocalizedAnalyzer(lang);
+                analyzer = analyzer != null ? analyzer : defaultAnalyzer;
+                log.trace("getWrappedAnalyzer fieldName: {}, analyzer: {}", fieldName, analyzer);
+                return analyzer;
+        }
+
+        @Override
+        public String toString() {
+                return "IndexingMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+        }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
index 1ba21d1..f3fb451 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
@@ -20,6 +20,8 @@ package org.apache.jena.query.text.analyzer ;
 
 import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 /** 
@@ -29,6 +31,7 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
  */
 
 public class MultilingualAnalyzer extends DelegatingAnalyzerWrapper {
+        private static Logger log = LoggerFactory.getLogger(MultilingualAnalyzer.class);
         private Analyzer defaultAnalyzer;
 
         public MultilingualAnalyzer(Analyzer defaultAnalyzer) {
@@ -44,7 +47,9 @@ public class MultilingualAnalyzer extends DelegatingAnalyzerWrapper {
                 }
                 String lang = fieldName.substring(idx+1);
                 Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
-                return (analyzer != null ? analyzer : defaultAnalyzer);
+                analyzer = analyzer != null ? analyzer : defaultAnalyzer;
+                log.trace("getWrappedAnalyzer {}", analyzer);
+                return analyzer;
         }
 
         @Override

http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
new file mode 100644
index 0000000..de35e9e
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer ;
+
+import org.apache.lucene.analysis.Analyzer ;
+import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/** 
+ * Lucene Analyzer implementation that delegates to a language-specific
+ * Analyzer based on a field name suffix: e.g. field="label_en" will use
+ * an EnglishAnalyzer.
+ */
+
+public class QueryMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
+        private static Logger log = LoggerFactory.getLogger(QueryMultilingualAnalyzer.class);
+        private Analyzer defaultAnalyzer;
+        private String langTag;
+
+        public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+                super(PER_FIELD_REUSE_STRATEGY);
+                this.defaultAnalyzer = defaultAnalyzer;
+                this.langTag = null;
+        }
+
+        public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer, String tag) {
+                super(PER_FIELD_REUSE_STRATEGY);
+                this.defaultAnalyzer = defaultAnalyzer;
+                this.langTag = tag;
+        }
+
+        @Override
+        /**
+         * The analyzer corresponding to the langTag supplied at instantiation
+         * is used to retrieve the analyzer to use regardless of the tag on the
+         * fieldName. If no langTag is supplied then the tag on fieldName is
+         * used to retrieve the analyzer as with the MultilingualAnalyzer
+         * 
+         * @param fieldName
+         * @return the analyzer to use in the search
+         */
+        protected Analyzer getWrappedAnalyzer(String fieldName) {
+                int idx = fieldName.lastIndexOf("_");
+                if (idx == -1) { // not language-specific, e.g. "label"
+                        return defaultAnalyzer;
+                }
+                String lang = langTag != null ? langTag : fieldName.substring(idx+1);
+                Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
+                analyzer = analyzer != null ? analyzer : defaultAnalyzer;
+                log.trace("getWrappedAnalyzer langTag: {}, fieldName: {}, analyzer: {}", langTag, fieldName, analyzer);
+                return analyzer;
+        }
+
+        @Override
+        public String toString() {
+                return "QueryMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+        }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 6ad0747..b41baa1 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -18,18 +18,30 @@
 
 package org.apache.jena.query.text.analyzer;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.lucene.analysis.Analyzer;
 import java.lang.reflect.Constructor;
+import java.util.ArrayList;
 import java.util.Hashtable;
+import java.util.List;
 
 public class Util {
 
-    private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
-    private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+    private static Hashtable<String, Class<?>> analyzersClasses; //mapping between BCP-47 language tags and lucene analyzersClasses
+    private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multiple analyzer instantiations
     
     // cache of defined text:defineAnalyzers
     private static Hashtable<String, Analyzer> definedAnalyzers = new Hashtable<>();
+    
+    // cache of defined text:indexAnalyzers
+    private static Hashtable<String, Analyzer> indexAnalyzers = new Hashtable<>();
+    
+    // cache of text:searchFor language tags
+    private static Hashtable<String, List<String>> searchForTags = new Hashtable<>();
+    
+    // map of auxiliary index info
+    private static Hashtable<String, List<String>> auxIndexes = new Hashtable<>();
 
     static {
         initAnalyzerDefs();
@@ -67,6 +79,41 @@ public class Util {
     public static void defineAnalyzer(Resource key, Analyzer analyzer) {
         definedAnalyzers.put(key.getURI(), analyzer);
     }
+    
+    public static Analyzer getIndexAnalyzer(String tag) {
+        return indexAnalyzers.get(tag);
+    }
+    
+    public static void addIndexAnalyzer(String tag, Analyzer analyzer) {
+        indexAnalyzers.put(tag, analyzer);
+    }
+    
+    public static boolean usingIndexAnalyzers() {
+        return !indexAnalyzers.isEmpty();
+    }
+    
+    public static List<String> getSearchForTags(String tag) {
+        List<String> tags = new ArrayList<>();
+        if (StringUtils.isNotEmpty(tag)) {
+            List<String> x = searchForTags.get(tag);
+            if (x != null) {
+                tags = x;
+            }
+        }
+        return tags;
+    }
+    
+    public static void addSearchForTags(String tag, List<String> tags) {
+        searchForTags.put(tag, tags);
+    }
+    
+    public static List<String> getAuxIndexes(String tag) {
+        return StringUtils.isNotEmpty(tag) ? auxIndexes.get(tag) : new ArrayList<>();
+    }
+    
+    public static void addAuxIndexes(String tag, List<String> tags) {
+        auxIndexes.put(tag, tags);
+    }
 
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();

http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
index 6326128..6977f13 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -18,6 +18,9 @@
 
 package org.apache.jena.query.text.assembler;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.jena.assembler.Assembler;
 import org.apache.jena.query.text.TextIndexException;
 import org.apache.jena.query.text.analyzer.Util;
@@ -26,6 +29,8 @@ import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.rdf.model.Statement;
 import org.apache.jena.vocabulary.RDF;
 import org.apache.lucene.analysis.Analyzer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class DefineAnalyzersAssembler {
     /*
@@ -39,7 +44,46 @@ public class DefineAnalyzersAssembler {
              text:analyzer [ . . . ]]
         )
     */
+    private static Logger          log      = LoggerFactory.getLogger(DefineAnalyzersAssembler.class) ;
+
+    private static List<String> getStringList(Statement stmt, String p) {
+        List<String> tags = new ArrayList<String>();
+        RDFNode aNode = stmt.getObject();
+        if (! aNode.isResource()) {
+            throw new TextIndexException(p + " property is not a list : " + aNode);
+        }
 
+        Resource current = (Resource) aNode;
+        while (current != null && ! current.equals(RDF.nil)) {
+            Statement firstStmt = current.getProperty(RDF.first);
+            if (firstStmt == null) {
+                throw new TextIndexException(p + " list not well formed: " + current);
+            }
+
+            RDFNode first = firstStmt.getObject();
+            if (! first.isLiteral()) {
+                throw new TextIndexException(p + " list not a String : " + first);
+            }
+
+            String tag = first.toString();
+            tags.add(tag);
+            
+            Statement restStmt = current.getProperty(RDF.rest);
+            if (restStmt == null) {
+                throw new TextIndexException(p + " list not terminated by rdf:nil");
+            }
+            
+            RDFNode rest = restStmt.getObject();
+            if (! rest.isResource()) {
+                throw new TextIndexException(p + " list rest node is not a resource : " + rest);
+            }
+            
+            current = (Resource) rest;
+        }
+       
+        return tags;
+    }
+   
     public static boolean open(Assembler a, Resource list) {
         Resource current = list;
         boolean isMultilingualSupport = false;
@@ -67,13 +111,6 @@ public class DefineAnalyzersAssembler {
                 // calls GenericAnalyzerAssembler
                 Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
                 
-                if (adding.hasProperty(TextVocab.pAddLang)) {
-                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
-                    String langCode = langStmt.getString();
-                    Util.addAnalyzer(langCode, analyzer);
-                    isMultilingualSupport = true;
-                }
-                
                 if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
                     Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
                     Resource id = defStmt.getResource();
@@ -84,6 +121,37 @@ public class DefineAnalyzersAssembler {
                         throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
                     }
                 }
+                
+                String langCode = null;
+                
+                if (adding.hasProperty(TextVocab.pAddLang)) {
+                    Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+                    langCode = langStmt.getString();
+                    Util.addAnalyzer(langCode, analyzer);
+                    isMultilingualSupport = true;
+                }
+                
+                if (langCode != null && adding.hasProperty(TextVocab.pSearchFor)) {
+                    Statement searchForStmt = adding.getProperty(TextVocab.pSearchFor);
+                    List<String> tags = getStringList(searchForStmt, "text:searchFor");
+                    Util.addSearchForTags(langCode, tags);
+                }
+                
+                if (langCode != null && adding.hasProperty(TextVocab.pAuxIndex)) {
+                    Statement searchForStmt = adding.getProperty(TextVocab.pAuxIndex);
+                    List<String> tags = getStringList(searchForStmt, "text:auxIndex");
+                    Util.addAuxIndexes(langCode, tags);
+                    log.trace("addAuxIndexes for {} with tags: {}", langCode, tags);
+                }
+                
+                
+                if (adding.hasProperty(TextVocab.pIndexAnalyzer)) {
+                    Statement indexStmt = adding.getProperty(TextVocab.pIndexAnalyzer);
+                    Resource key = indexStmt.getResource();
+                    Analyzer indexer = Util.getDefinedAnalyzer(key);
+                    Util.addIndexAnalyzer(langCode, indexer);
+                    log.trace("addIndexAnalyzer lang: {} with analyzer: {}", langCode, indexer);
+                }
             }
             
             Statement restStmt = current.getProperty(RDF.rest);

http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 187715a4..973a3a4 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -108,6 +108,10 @@ public class TextVocab
     public static final Property pDefTokenizer      = Vocab.property(NS, "defineTokenizer");
     public static final Property pAddLang           = Vocab.property(NS, "addLang");
     public static final Property pUseAnalyzer       = Vocab.property(NS, "useAnalyzer");
+    public static final Property pSearchFor         = Vocab.property(NS, "searchFor");
+    public static final Property pAuxIndex          = Vocab.property(NS, "auxIndex");
+    public static final Property pIndexAnalyzer     = Vocab.property(NS, "indexAnalyzer");
+    public static final Property indexAnalyzer      = Vocab.property(NS, "IndexAnalyzer");
     
     // Query Cache
     public static final Property pCacheQueries      = Vocab.property(NS, "cacheQueries");


[5/6] jena git commit: added auxIndex unit test

Posted by co...@apache.org.
added auxIndex unit test

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/28f94076
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/28f94076
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/28f94076

Branch: refs/heads/master
Commit: 28f9407600b6c6fe8363c23015ca141021404578
Parents: e4c2b91
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Tue Jun 19 09:25:06 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Tue Jun 19 09:25:06 2018 -0500

----------------------------------------------------------------------
 .../text/TestTextMultilingualEnhancements.java  | 88 +++++++++++++++++++-
 1 file changed, 84 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/28f94076/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
index c9e7f05..8170408 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
@@ -85,7 +85,15 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
                     "    text:directory \"mem\" ;",
                     "    text:storeValues true ;",
                     "    text:entityMap :entMap ;",
+                    "    text:multilingualSupport true ;", 
                     "    text:defineAnalyzers (",
+                    "      [ text:defineAnalyzer :folding ;",
+                    "        text:analyzer [",
+                    "          a text:ConfigurableAnalyzer ;",
+                    "          text:tokenizer text:StandardTokenizer ;",
+                    "          text:filters (text:LowerCaseFilter text:ASCIIFoldingFilter) ;",
+                    "        ]",
+                    "      ]",
                     "      [ text:addLang \"en-01\" ;",
                     "        text:searchFor ( \"en-01\" \"en-02\" ) ;",
                     "        text:analyzer [ a text:StandardAnalyzer ]",
@@ -100,6 +108,17 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
                     "      [ text:addLang \"en-04\" ;",
                     "        text:analyzer [ a text:StandardAnalyzer ]",
                     "      ]",
+                    "      [ text:addLang \"en-05\" ;",
+                    "        text:searchFor ( \"en-05\" \"en-aux\" ) ;",
+                    "        text:analyzer [ a text:StandardAnalyzer ]",
+                    "      ]",
+                    "      [ text:addLang \"en-aux\" ;",
+                    "        text:searchFor ( \"en-05\" \"en-aux\" ) ;",
+                    "        text:analyzer [ ",
+                    "          a text:DefinedAnalyzer ; ",
+                    "          text:useAnalyzer :folding",
+                    "        ]",
+                    "      ]",
                     "    ) ;",
                     "    .",
                     "",
@@ -160,6 +179,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
                 assertNotNull(literal);
                 literals.put(entityUri, literal);
             }
+            System.err.println("Query: " + queryString + ", COUNT: " + count + ", Expected; " + expectedEntityURIs.size());
             assertEquals(expectedEntityURIs.size(), count);
         }
         finally {
@@ -169,7 +189,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
     }
 
     @Test
-    public void testTextMultilingualFeatures1() {
+    public void testTextSearchFor1() {
         final String turtleA = StrUtils.strjoinNL(
                 TURTLE_PROLOG,
                 "<" + RESOURCE_BASE + "testResultOneInModelA>",
@@ -184,7 +204,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
                 QUERY_PROLOG,
                 "SELECT ?s ?lit",
                 "WHERE {",
-                "  (?s ?sc ?lit ?g) text:query ( \"green\" ) . ",
+                "  (?s ?sc ?lit ?g) text:query ( \"green\"@en-02 ) . ",
                 "}"
                 );
         Set<String> expectedURIs = new HashSet<>() ;
@@ -200,7 +220,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
     }
 
     @Test
-    public void testTextMultilingualFeatures2() {
+    public void testTextSearchFor2() {
         final String turtleA = StrUtils.strjoinNL(
                 TURTLE_PROLOG,
                 "<" + RESOURCE_BASE + "testResultOneInModelA>",
@@ -215,7 +235,36 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
                 QUERY_PROLOG,
                 "SELECT ?s ?lit",
                 "WHERE {",
-                "  (?s ?sc ?lit ?g) text:query ( \"flower\" ) . ",
+                "  (?s ?sc ?lit ?g) text:query ( \"flower\"@en-01 ) . ",
+                "}"
+                );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA")) ;
+        
+        Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+        assertEquals(1, literals.size());
+        
+        Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+        assertNotNull(value);
+    }
+
+    @Test
+    public void testTextSimple1() {
+        final String turtleA = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testResultOneInModelA>",
+                "  rdfs:label \"one green flower\"@en-03",
+                ".",
+                "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+                "  rdfs:label \"two green flowers\"@en-04",
+                "."
+                );
+        putTurtleInModel(turtleA, "http://example.org/modelA") ;
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?lit",
+                "WHERE {",
+                "  (?s ?sc ?lit ?g) text:query ( \"green\"@en-03 ) . ",
                 "}"
                 );
         Set<String> expectedURIs = new HashSet<>() ;
@@ -227,4 +276,35 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
         Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
         assertNotNull(value);
     }
+
+    @Test
+    public void testTextAux1() {
+        final String turtleA = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testResultOneInModelA>",
+                "  rdfs:label \"one Green flower\"@en-05",
+                ".",
+                "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+                "  rdfs:label \"two gReeN flowers\"@en-05",
+                "."
+                );
+        putTurtleInModel(turtleA, "http://example.org/modelA") ;
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?lit",
+                "WHERE {",
+                "  (?s ?sc ?lit ?g) text:query ( \"green\"@en-aux ) . ",
+                "}"
+                );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA", RESOURCE_BASE + "testResultTwoInModelA")) ;
+        
+        Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+        assertEquals(2, literals.size());
+        
+        Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+        assertNotNull(value);
+        value = literals.get(RESOURCE_BASE + "testResultTwoInModelA");
+        assertNotNull(value);
+    }
 }


[4/6] jena git commit: added searchFor unit tests

Posted by co...@apache.org.
added searchFor unit tests

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/e4c2b918
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/e4c2b918
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/e4c2b918

Branch: refs/heads/master
Commit: e4c2b918c5db86de8c0176d5f5725d570e96a946
Parents: fee0151
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Fri Jun 15 14:49:18 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Fri Jun 15 14:49:18 2018 -0500

----------------------------------------------------------------------
 .../org/apache/jena/query/text/TS_Text.java     |   1 +
 .../text/TestTextMultilingualEnhancements.java  | 230 +++++++++++++++++++
 2 files changed, 231 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/e4c2b918/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 362a578..c1f7037 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -59,6 +59,7 @@ import org.junit.runners.Suite.SuiteClasses;
     , TestTextGraphIndexExtra2.class
     , TestTextHighlighting.class
     , TestTextDefineAnalyzers.class
+    , TestTextMultilingualEnhancements.class
 })
 
 public class TS_Text

http://git-wip-us.apache.org/repos/asf/jena/blob/e4c2b918/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
new file mode 100644
index 0000000..c9e7f05
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.Reader ;
+import java.io.StringReader ;
+import java.util.Arrays ;
+import java.util.HashMap ;
+import java.util.HashSet ;
+import java.util.Map ;
+import java.util.Set ;
+
+import org.apache.jena.assembler.Assembler ;
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.query.Dataset ;
+import org.apache.jena.query.Query ;
+import org.apache.jena.query.QueryExecution ;
+import org.apache.jena.query.QueryExecutionFactory ;
+import org.apache.jena.query.QueryFactory ;
+import org.apache.jena.query.QuerySolution ;
+import org.apache.jena.query.ReadWrite ;
+import org.apache.jena.query.ResultSet ;
+import org.apache.jena.query.text.assembler.TextAssembler ;
+import org.apache.jena.rdf.model.Literal;
+import org.apache.jena.rdf.model.Model ;
+import org.apache.jena.rdf.model.ModelFactory ;
+import org.apache.jena.rdf.model.Resource ;
+import org.junit.After ;
+import org.junit.Before ;
+import org.junit.Test ;
+
+public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTextIndexBase {
+
+    private static final String SPEC_BASE = "http://example.org/spec#";
+    private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+    private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+    private static final String SPEC;
+    static {
+        SPEC = StrUtils.strjoinNL(
+                    "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+                    "prefix ja:   <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+                    "prefix tdb:  <http://jena.hpl.hp.com/2008/tdb#>",
+                    "prefix text: <http://jena.apache.org/text#>",
+                    "prefix :     <" + SPEC_BASE + ">",
+                    "",
+                    "[] ja:loadClass    \"org.apache.jena.query.text.TextQuery\" .",
+                    "text:TextDataset      rdfs:subClassOf   ja:RDFDataset .",
+                    "text:TextIndexLucene  rdfs:subClassOf   text:TextIndex .",
+
+                    ":" + SPEC_ROOT_LOCAL,
+                    "    a              text:TextDataset ;",
+                    "    text:dataset   :dataset ;",
+                    "    text:index     :indexLucene ;",
+                    "    .",
+                    "",
+                    ":dataset",
+                    "    a                     tdb:DatasetTDB ;",
+                    "    tdb:location          \"--mem--\" ;",
+                    "    tdb:unionDefaultGraph true ;",
+                    ".",
+                    "",
+                    ":indexLucene",
+                    "    a text:TextIndexLucene ;",
+                    "    text:directory \"mem\" ;",
+                    "    text:storeValues true ;",
+                    "    text:entityMap :entMap ;",
+                    "    text:defineAnalyzers (",
+                    "      [ text:addLang \"en-01\" ;",
+                    "        text:searchFor ( \"en-01\" \"en-02\" ) ;",
+                    "        text:analyzer [ a text:StandardAnalyzer ]",
+                    "      ]",
+                    "      [ text:addLang \"en-02\" ;",
+                    "        text:searchFor ( \"en-01\" \"en-02\" ) ;",
+                    "        text:analyzer [ a text:StandardAnalyzer ]",
+                    "      ]",
+                    "      [ text:addLang \"en-03\" ;",
+                    "        text:analyzer [ a text:StandardAnalyzer ]",
+                    "      ]",
+                    "      [ text:addLang \"en-04\" ;",
+                    "        text:analyzer [ a text:StandardAnalyzer ]",
+                    "      ]",
+                    "    ) ;",
+                    "    .",
+                    "",
+                    ":entMap",
+                    "    a text:EntityMap ;",
+                    "    text:entityField      \"uri\" ;",
+                    "    text:defaultField     \"label\" ;",
+                    "    text:langField        \"lang\" ;",
+                    "    text:graphField       \"graph\" ;",
+                    "    text:map (",
+                    "         [ text:field \"label\" ; text:predicate rdfs:label ]",
+                    "         [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+                    "         ) ."
+                    );
+    }
+
+    @Before
+    public void before() {
+        Reader reader = new StringReader(SPEC);
+        Model specModel = ModelFactory.createDefaultModel();
+        specModel.read(reader, "", "TURTLE");
+        TextAssembler.init();
+        Resource root = specModel.getResource(SPEC_ROOT_URI);
+        dataset = (Dataset) Assembler.general.open(root);
+    }
+
+    @After
+    public void after() {
+        dataset.close();
+    }
+
+    private void putTurtleInModel(String turtle, String modelName) {
+        Model model = modelName != null ? dataset.getNamedModel(modelName) : dataset.getDefaultModel() ;
+        Reader reader = new StringReader(turtle) ;
+        dataset.begin(ReadWrite.WRITE) ;
+        try {
+            model.read(reader, "", "TURTLE") ;
+            dataset.commit() ;
+        }
+        finally {
+            dataset.end();
+        }
+    }
+
+    protected Map<String,Literal> doTestSearchWithLiterals(String queryString, Set<String> expectedEntityURIs) {
+        Map<String,Literal> literals = new HashMap<>();
+        Query query = QueryFactory.create(queryString) ;
+        dataset.begin(ReadWrite.READ);
+        try(QueryExecution qexec = QueryExecutionFactory.create(query, dataset)) {
+            ResultSet results = qexec.execSelect() ;
+            assertEquals(expectedEntityURIs.size() > 0, results.hasNext());
+            int count;
+            for (count=0; results.hasNext(); count++) {
+                QuerySolution soln = results.nextSolution();
+                String entityUri = soln.getResource("s").getURI();
+                assertTrue(expectedEntityURIs.contains(entityUri));
+                Literal literal = soln.getLiteral("lit");
+                assertNotNull(literal);
+                literals.put(entityUri, literal);
+            }
+            assertEquals(expectedEntityURIs.size(), count);
+        }
+        finally {
+            dataset.end() ;
+        }
+        return literals;
+    }
+
+    @Test
+    public void testTextMultilingualFeatures1() {
+        final String turtleA = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testResultOneInModelA>",
+                "  rdfs:label \"one green flower\"@en-01",
+                ".",
+                "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+                "  rdfs:label \"two green flowers\"@en-02",
+                "."
+                );
+        putTurtleInModel(turtleA, "http://example.org/modelA") ;
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?lit",
+                "WHERE {",
+                "  (?s ?sc ?lit ?g) text:query ( \"green\" ) . ",
+                "}"
+                );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA", RESOURCE_BASE + "testResultTwoInModelA")) ;
+        
+        Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+        assertEquals(2, literals.size());
+        
+        Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+        assertNotNull(value);
+        value = literals.get(RESOURCE_BASE + "testResultTwoInModelA");
+        assertNotNull(value);
+    }
+
+    @Test
+    public void testTextMultilingualFeatures2() {
+        final String turtleA = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testResultOneInModelA>",
+                "  rdfs:label \"one green flower\"@en-01",
+                ".",
+                "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+                "  rdfs:label \"two green flowers\"@en-02",
+                "."
+                );
+        putTurtleInModel(turtleA, "http://example.org/modelA") ;
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s ?lit",
+                "WHERE {",
+                "  (?s ?sc ?lit ?g) text:query ( \"flower\" ) . ",
+                "}"
+                );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA")) ;
+        
+        Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+        assertEquals(1, literals.size());
+        
+        Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+        assertNotNull(value);
+    }
+}


[2/6] jena git commit: cleanup per comments from afs

Posted by co...@apache.org.
cleanup per comments from afs

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/83492171
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/83492171
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/83492171

Branch: refs/heads/master
Commit: 83492171b8e61e42ebde29293809d2c8a2d80d0a
Parents: 0d07ca9
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Thu Jun 14 08:42:03 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Thu Jun 14 08:42:03 2018 -0500

----------------------------------------------------------------------
 .../src/main/java/org/apache/jena/query/text/TextIndexLucene.java | 1 -
 .../org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java | 1 -
 .../jena/query/text/analyzer/IndexingMultilingualAnalyzer.java    | 1 -
 .../apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java | 1 -
 .../org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java | 1 -
 .../jena/query/text/analyzer/QueryMultilingualAnalyzer.java       | 1 -
 .../jena/query/text/assembler/DefineAnalyzersAssembler.java       | 3 +--
 .../main/java/org/apache/jena/query/text/assembler/TextVocab.java | 1 -
 8 files changed, 1 insertion(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index cd4d63f..0e70688 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -433,7 +433,6 @@ public class TextIndexLucene implements TextIndex {
             throw new TextIndexParseException(qs, ex.getMessage()) ;
         }
         catch (Exception ex) {
-            ex.printStackTrace(); // TEMPORARY 
             throw new TextIndexException("query", ex) ;
         }
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
index 8d54d2c..b5bbeea 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
@@ -40,7 +40,6 @@ import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
-
 /** 
  * Lucene Analyzer implementation that can be configured with different
  * Tokenizer and (optionally) TokenFilter implementations.

http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
index 9f3b890..6faf82a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /** 
  * Lucene Analyzer implementation that delegates to a language-specific
  * Analyzer based on a field name suffix: e.g. field="label_en" will use

http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
index 071569b..63852fd 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
@@ -22,7 +22,6 @@ import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.analysis.core.KeywordTokenizer ;
 import org.apache.lucene.analysis.core.LowerCaseFilter ;
 
-
 /** 
  * Lucene Analyzer implementation that works like KeywordAnalyzer (i.e.
  * doesn't tokenize the input, keeps it as a single token), but forces text

http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
index f3fb451..de67f9a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /** 
  * Lucene Analyzer implementation that delegates to a language-specific
  * Analyzer based on a field name suffix: e.g. field="label_en" will use

http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
index de35e9e..33005cf 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /** 
  * Lucene Analyzer implementation that delegates to a language-specific
  * Analyzer based on a field name suffix: e.g. field="label_en" will use

http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
index 6977f13..c9dc9fe 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -143,8 +143,7 @@ public class DefineAnalyzersAssembler {
                     Util.addAuxIndexes(langCode, tags);
                     log.trace("addAuxIndexes for {} with tags: {}", langCode, tags);
                 }
-                
-                
+                               
                 if (adding.hasProperty(TextVocab.pIndexAnalyzer)) {
                     Statement indexStmt = adding.getProperty(TextVocab.pIndexAnalyzer);
                     Resource key = indexStmt.getResource();

http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 973a3a4..6a2922b 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -111,7 +111,6 @@ public class TextVocab
     public static final Property pSearchFor         = Vocab.property(NS, "searchFor");
     public static final Property pAuxIndex          = Vocab.property(NS, "auxIndex");
     public static final Property pIndexAnalyzer     = Vocab.property(NS, "indexAnalyzer");
-    public static final Property indexAnalyzer      = Vocab.property(NS, "IndexAnalyzer");
     
     // Query Cache
     public static final Property pCacheQueries      = Vocab.property(NS, "cacheQueries");


[6/6] jena git commit: This closes #436 - Merge branch 'Jena-1556-MutilingualEnhancements-3.8.0'

Posted by co...@apache.org.
This closes #436 - Merge branch 'Jena-1556-MutilingualEnhancements-3.8.0'


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/fe9bdefa
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/fe9bdefa
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/fe9bdefa

Branch: refs/heads/master
Commit: fe9bdefa42c84d5235555fa160a5f221e9656944
Parents: 3e999d5 28f9407
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Jun 20 15:58:27 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Jun 20 15:58:27 2018 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/TextIndexLucene.java | 155 ++++++----
 .../text/analyzer/ConfigurableAnalyzer.java     |   1 -
 .../analyzer/IndexingMultilingualAnalyzer.java  |  61 ++++
 .../text/analyzer/LowerCaseKeywordAnalyzer.java |   1 -
 .../text/analyzer/MultilingualAnalyzer.java     |   8 +-
 .../analyzer/QueryMultilingualAnalyzer.java     |  76 +++++
 .../apache/jena/query/text/analyzer/Util.java   |  51 ++-
 .../assembler/DefineAnalyzersAssembler.java     |  81 ++++-
 .../jena/query/text/assembler/TextVocab.java    |   3 +
 .../org/apache/jena/query/text/TS_Text.java     |   1 +
 .../text/TestTextMultilingualEnhancements.java  | 310 +++++++++++++++++++
 11 files changed, 679 insertions(+), 69 deletions(-)
----------------------------------------------------------------------



[3/6] jena git commit: various cleanup per @kinow

Posted by co...@apache.org.
various cleanup per @kinow

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/fee01519
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/fee01519
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/fee01519

Branch: refs/heads/master
Commit: fee015197ccd58002e8af33fe7d39aff767d8a42
Parents: 8349217
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Fri Jun 15 09:07:10 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Fri Jun 15 09:07:10 2018 -0500

----------------------------------------------------------------------
 .../apache/jena/query/text/TextIndexLucene.java |  4 +-
 .../analyzer/IndexingMultilingualAnalyzer.java  | 47 ++++++------
 .../analyzer/QueryMultilingualAnalyzer.java     | 75 ++++++++++----------
 .../apache/jena/query/text/analyzer/Util.java   |  2 +-
 .../assembler/DefineAnalyzersAssembler.java     |  2 +-
 5 files changed, 66 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 0e70688..120e83f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -102,6 +102,8 @@ public class TextIndexLucene implements TextIndex {
     private final String           queryParserType ;
     private final FieldType        ftText ;
     private final boolean          isMultilingual ;
+    
+    private Map<String, Analyzer> multilingualQueryAnalyzers = new HashMap<>();
 
     // The IndexWriter can't be final because we may have to recreate it if rollback() is called.
     // However, it needs to be volatile in case the next write transaction is on a different thread,
@@ -568,8 +570,6 @@ public class TextIndexLucene implements TextIndex {
         return results ;
     }
     
-    private Map<String, Analyzer> multilingualQueryAnalyzers = new HashMap<>();
-    
     private Analyzer getQueryAnalyzer(boolean usingSearchFor, String lang) {
         if (usingSearchFor) {
             Analyzer qa = multilingualQueryAnalyzers.get(lang);

http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
index 6faf82a..3bab0d2 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
@@ -18,6 +18,7 @@
 
 package org.apache.jena.query.text.analyzer ;
 
+import org.apache.commons.lang3.ObjectUtils;
 import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
 import org.slf4j.Logger;
@@ -30,31 +31,31 @@ import org.slf4j.LoggerFactory;
  */
 
 public class IndexingMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
-        private static Logger log = LoggerFactory.getLogger(IndexingMultilingualAnalyzer.class);
-        
-        private Analyzer defaultAnalyzer;
+    private static Logger log = LoggerFactory.getLogger(IndexingMultilingualAnalyzer.class);
 
-        public IndexingMultilingualAnalyzer(Analyzer defaultAnalyzer) {
-                super(PER_FIELD_REUSE_STRATEGY);
-                this.defaultAnalyzer = defaultAnalyzer;
-        }
+    private Analyzer defaultAnalyzer;
 
-        @Override
-        protected Analyzer getWrappedAnalyzer(String fieldName) {
-                int idx = fieldName.lastIndexOf("_");
-                if (idx == -1) { // not language-specific, e.g. "label"
-                        return defaultAnalyzer;
-                }
-                String lang = fieldName.substring(idx+1);
-                Analyzer analyzer = Util.getIndexAnalyzer(lang);
-                analyzer = analyzer != null ? analyzer : Util.getLocalizedAnalyzer(lang);
-                analyzer = analyzer != null ? analyzer : defaultAnalyzer;
-                log.trace("getWrappedAnalyzer fieldName: {}, analyzer: {}", fieldName, analyzer);
-                return analyzer;
-        }
+    public IndexingMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+        super(PER_FIELD_REUSE_STRATEGY);
+        this.defaultAnalyzer = defaultAnalyzer;
+    }
 
-        @Override
-        public String toString() {
-                return "IndexingMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+    @Override
+    protected Analyzer getWrappedAnalyzer(String fieldName) {
+        int idx = fieldName.lastIndexOf("_");
+        if (idx == -1) { // not language-specific, e.g. "label"
+            return defaultAnalyzer;
         }
+        String lang = fieldName.substring(idx+1);
+        Analyzer analyzer = Util.getIndexAnalyzer(lang);
+        analyzer = ObjectUtils.defaultIfNull(analyzer, Util.getLocalizedAnalyzer(lang));
+        analyzer = ObjectUtils.defaultIfNull(analyzer, defaultAnalyzer);
+        log.trace("getWrappedAnalyzer fieldName: {}, analyzer: {}", fieldName, analyzer);
+        return analyzer;
+    }
+
+    @Override
+    public String toString() {
+        return "IndexingMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+    }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
index 33005cf..de16c7a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
@@ -18,6 +18,7 @@
 
 package org.apache.jena.query.text.analyzer ;
 
+import org.apache.commons.lang3.ObjectUtils;
 import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
 import org.slf4j.Logger;
@@ -30,46 +31,46 @@ import org.slf4j.LoggerFactory;
  */
 
 public class QueryMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
-        private static Logger log = LoggerFactory.getLogger(QueryMultilingualAnalyzer.class);
-        private Analyzer defaultAnalyzer;
-        private String langTag;
+    private static Logger log = LoggerFactory.getLogger(QueryMultilingualAnalyzer.class);
+    private Analyzer defaultAnalyzer;
+    private String langTag;
 
-        public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer) {
-                super(PER_FIELD_REUSE_STRATEGY);
-                this.defaultAnalyzer = defaultAnalyzer;
-                this.langTag = null;
-        }
+    public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+        super(PER_FIELD_REUSE_STRATEGY);
+        this.defaultAnalyzer = defaultAnalyzer;
+        this.langTag = null;
+    }
 
-        public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer, String tag) {
-                super(PER_FIELD_REUSE_STRATEGY);
-                this.defaultAnalyzer = defaultAnalyzer;
-                this.langTag = tag;
-        }
+    public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer, String tag) {
+        super(PER_FIELD_REUSE_STRATEGY);
+        this.defaultAnalyzer = defaultAnalyzer;
+        this.langTag = tag;
+    }
 
-        @Override
-        /**
-         * The analyzer corresponding to the langTag supplied at instantiation
-         * is used to retrieve the analyzer to use regardless of the tag on the
-         * fieldName. If no langTag is supplied then the tag on fieldName is
-         * used to retrieve the analyzer as with the MultilingualAnalyzer
-         * 
-         * @param fieldName
-         * @return the analyzer to use in the search
-         */
-        protected Analyzer getWrappedAnalyzer(String fieldName) {
-                int idx = fieldName.lastIndexOf("_");
-                if (idx == -1) { // not language-specific, e.g. "label"
-                        return defaultAnalyzer;
-                }
-                String lang = langTag != null ? langTag : fieldName.substring(idx+1);
-                Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
-                analyzer = analyzer != null ? analyzer : defaultAnalyzer;
-                log.trace("getWrappedAnalyzer langTag: {}, fieldName: {}, analyzer: {}", langTag, fieldName, analyzer);
-                return analyzer;
+    @Override
+    /**
+     * The analyzer corresponding to the langTag supplied at instantiation
+     * is used to retrieve the analyzer to use regardless of the tag on the
+     * fieldName. If no langTag is supplied then the tag on fieldName is
+     * used to retrieve the analyzer as with the MultilingualAnalyzer
+     * 
+     * @param fieldName
+     * @return the analyzer to use in the search
+     */
+    protected Analyzer getWrappedAnalyzer(String fieldName) {
+        int idx = fieldName.lastIndexOf("_");
+        if (idx == -1) { // not language-specific, e.g. "label"
+            return defaultAnalyzer;
         }
+        String lang = ObjectUtils.defaultIfNull(langTag, fieldName.substring(idx+1));
+        Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
+        analyzer = ObjectUtils.defaultIfNull(analyzer, defaultAnalyzer);
+        log.trace("getWrappedAnalyzer langTag: {}, fieldName: {}, analyzer: {}", langTag, fieldName, analyzer);
+        return analyzer;
+    }
 
-        @Override
-        public String toString() {
-                return "QueryMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
-        }
+    @Override
+    public String toString() {
+        return "QueryMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+    }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index b41baa1..1e7b85d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -108,7 +108,7 @@ public class Util {
     }
     
     public static List<String> getAuxIndexes(String tag) {
-        return StringUtils.isNotEmpty(tag) ? auxIndexes.get(tag) : new ArrayList<>();
+        return StringUtils.isNotEmpty(tag) ? auxIndexes.get(tag) : null;
     }
     
     public static void addAuxIndexes(String tag, List<String> tags) {

http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
index c9dc9fe..876ca74 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -44,7 +44,7 @@ public class DefineAnalyzersAssembler {
              text:analyzer [ . . . ]]
         )
     */
-    private static Logger          log      = LoggerFactory.getLogger(DefineAnalyzersAssembler.class) ;
+    private static Logger log = LoggerFactory.getLogger(DefineAnalyzersAssembler.class) ;
 
     private static List<String> getStringList(Statement stmt, String p) {
         List<String> tags = new ArrayList<String>();