You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by co...@apache.org on 2018/06/20 20:59:59 UTC
[1/6] jena git commit: JENA-1556 implementation
Repository: jena
Updated Branches:
refs/heads/master 3e999d55d -> fe9bdefa4
JENA-1556 implementation
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0d07ca90
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0d07ca90
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0d07ca90
Branch: refs/heads/master
Commit: 0d07ca904f9495f5a320faf5b7dd761d5f7294ab
Parents: f0b0522
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Jun 13 13:13:15 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Jun 13 13:13:15 2018 -0500
----------------------------------------------------------------------
.../apache/jena/query/text/TextIndexLucene.java | 156 ++++++++++++-------
.../analyzer/IndexingMultilingualAnalyzer.java | 61 ++++++++
.../text/analyzer/MultilingualAnalyzer.java | 7 +-
.../analyzer/QueryMultilingualAnalyzer.java | 76 +++++++++
.../apache/jena/query/text/analyzer/Util.java | 51 +++++-
.../assembler/DefineAnalyzersAssembler.java | 82 +++++++++-
.../jena/query/text/assembler/TextVocab.java | 4 +
7 files changed, 371 insertions(+), 66 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 3eacfbe..cd4d63f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -31,7 +31,10 @@ import org.apache.jena.datatypes.TypeMapper ;
import org.apache.jena.datatypes.xsd.XSDDatatype ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory ;
+import org.apache.jena.query.text.analyzer.IndexingMultilingualAnalyzer;
import org.apache.jena.query.text.analyzer.MultilingualAnalyzer;
+import org.apache.jena.query.text.analyzer.QueryMultilingualAnalyzer;
+import org.apache.jena.query.text.analyzer.Util;
import org.apache.jena.sparql.util.NodeFactoryExtra ;
import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.TokenStream;
@@ -85,14 +88,16 @@ public class TextIndexLucene implements TextIndex {
ftIRI = new FieldType() ;
ftIRI.setTokenized(false) ;
ftIRI.setStored(true) ;
- ftIRI.setIndexOptions(IndexOptions.DOCS);
+ ftIRI.setIndexOptions(IndexOptions.DOCS);
ftIRI.freeze() ;
}
public static final FieldType ftString = StringField.TYPE_NOT_STORED ;
private final EntityDefinition docDef ;
private final Directory directory ;
- private final Analyzer analyzer ;
+ private final Analyzer indexAnalyzer ;
+ private Analyzer defaultAnalyzer ;
+ private Map<String, Analyzer> analyzerPerField;
private final Analyzer queryAnalyzer ;
private final String queryParserType ;
private final FieldType ftText ;
@@ -122,7 +127,7 @@ public class TextIndexLucene implements TextIndex {
// create the analyzer as a wrapper that uses KeywordAnalyzer for
// entity and graph fields and the configured analyzer(s) for all other
- Map<String, Analyzer> analyzerPerField = new HashMap<>() ;
+ analyzerPerField = new HashMap<>() ;
analyzerPerField.put(docDef.getEntityField(), new KeywordAnalyzer()) ;
if ( docDef.getGraphField() != null )
analyzerPerField.put(docDef.getGraphField(), new KeywordAnalyzer()) ;
@@ -136,12 +141,17 @@ public class TextIndexLucene implements TextIndex {
}
}
- Analyzer defaultAnalyzer = (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer();
- if (this.isMultilingual)
- defaultAnalyzer = new MultilingualAnalyzer(defaultAnalyzer);
- this.analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerPerField) ;
- this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
+ defaultAnalyzer = (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer();
+ Analyzer indexDefault = defaultAnalyzer;
+ Analyzer queryDefault = defaultAnalyzer;
+ if (this.isMultilingual) {
+ queryDefault = new MultilingualAnalyzer(defaultAnalyzer);
+ indexDefault = Util.usingIndexAnalyzers() ? new IndexingMultilingualAnalyzer(defaultAnalyzer) : queryDefault;
+ }
+ this.indexAnalyzer = new PerFieldAnalyzerWrapper(indexDefault, analyzerPerField) ;
+ this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : new PerFieldAnalyzerWrapper(queryDefault, analyzerPerField) ;
this.queryParserType = config.getQueryParser() ;
+ log.debug("TextIndexLucene defaultAnalyzer: {}, indexAnalyzer: {}, queryAnalyzer: {}, queryParserType: {}", defaultAnalyzer, indexAnalyzer, queryAnalyzer, queryParserType);
this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ;
if (config.isValueStored() && docDef.getLangField() == null)
log.warn("Values stored but langField not set. Returned values will not have language tag or datatype.");
@@ -150,7 +160,7 @@ public class TextIndexLucene implements TextIndex {
}
private void openIndexWriter() {
- IndexWriterConfig wConfig = new IndexWriterConfig(analyzer) ;
+ IndexWriterConfig wConfig = new IndexWriterConfig(indexAnalyzer) ;
try
{
indexWriter = new IndexWriter(directory, wConfig) ;
@@ -158,12 +168,12 @@ public class TextIndexLucene implements TextIndex {
indexWriter.commit();
}
catch (IndexFormatTooOldException e) {
- throw new TextIndexException("jena-text/Lucene cannot use indexes created before Jena 3.3.0. "
- + "Please rebuild your text index using jena.textindexer from Jena 3.3.0 or above.", e);
+ throw new TextIndexException("jena-text/Lucene cannot use indexes created before Jena 3.3.0. "
+ + "Please rebuild your text index using jena.textindexer from Jena 3.3.0 or above.", e);
}
catch (IOException e)
{
- throw new TextIndexException(e) ;
+ throw new TextIndexException("openIndexWriter", e) ;
}
}
@@ -172,7 +182,7 @@ public class TextIndexLucene implements TextIndex {
}
public Analyzer getAnalyzer() {
- return analyzer ;
+ return indexAnalyzer ;
}
public Analyzer getQueryAnalyzer() {
@@ -189,7 +199,7 @@ public class TextIndexLucene implements TextIndex {
indexWriter.prepareCommit();
}
catch (IOException e) {
- throw new TextIndexException(e);
+ throw new TextIndexException("prepareCommit", e);
}
}
@@ -199,7 +209,7 @@ public class TextIndexLucene implements TextIndex {
indexWriter.commit();
}
catch (IOException e) {
- throw new TextIndexException(e);
+ throw new TextIndexException("commit", e);
}
}
@@ -211,7 +221,7 @@ public class TextIndexLucene implements TextIndex {
idx.rollback();
}
catch (IOException e) {
- throw new TextIndexException(e);
+ throw new TextIndexException("rollback", e);
}
// The rollback will close the indexWriter, so we need to reopen it
@@ -224,7 +234,7 @@ public class TextIndexLucene implements TextIndex {
indexWriter.close() ;
}
catch (IOException ex) {
- throw new TextIndexException(ex) ;
+ throw new TextIndexException("close", ex) ;
}
}
@@ -237,7 +247,7 @@ public class TextIndexLucene implements TextIndex {
try {
updateDocument(entity);
} catch (IOException e) {
- throw new TextIndexException(e) ;
+ throw new TextIndexException("updateEntity", e) ;
}
}
@@ -259,7 +269,7 @@ public class TextIndexLucene implements TextIndex {
addDocument(entity);
}
catch (IOException e) {
- throw new TextIndexException(e) ;
+ throw new TextIndexException("addEntity", e) ;
}
}
@@ -288,7 +298,7 @@ public class TextIndexLucene implements TextIndex {
indexWriter.deleteDocuments(uid);
} catch (Exception e) {
- throw new TextIndexException(e) ;
+ throw new TextIndexException("deleteEntity", e) ;
}
}
@@ -316,6 +326,13 @@ public class TextIndexLucene implements TextIndex {
if (this.isMultilingual) {
// add a field that uses a language-specific analyzer via MultilingualAnalyzer
doc.add(new Field(e.getKey() + "_" + lang, (String) e.getValue(), ftText));
+ // add fields for any defined auxiliary indexes
+ List<String> auxIndexes = Util.getAuxIndexes(lang);
+ if (auxIndexes != null) {
+ for (String auxTag : auxIndexes) {
+ doc.add(new Field(e.getKey() + "_" + auxTag, (String) e.getValue(), ftText));
+ }
+ }
}
} else if (datatype != null && !datatype.equals(XSDDatatype.XSDstring)) {
// for non-string and non-langString datatypes, store the datatype in langField
@@ -342,7 +359,7 @@ public class TextIndexLucene implements TextIndex {
return x.get(0) ;
}
catch (Exception ex) {
- throw new TextIndexException(ex) ;
+ throw new TextIndexException("get", ex) ;
}
}
@@ -416,7 +433,8 @@ public class TextIndexLucene implements TextIndex {
throw new TextIndexParseException(qs, ex.getMessage()) ;
}
catch (Exception ex) {
- throw new TextIndexException(ex) ;
+ ex.printStackTrace(); // TEMPORARY
+ throw new TextIndexException("query", ex) ;
}
}
@@ -514,7 +532,7 @@ public class TextIndexLucene implements TextIndex {
return rez;
}
- private List<TextHit> highlightResults(ScoreDoc[] sDocs, IndexSearcher indexSearcher, Query query, String field, String highlight)
+ private List<TextHit> highlightResults(ScoreDoc[] sDocs, IndexSearcher indexSearcher, Query query, String field, String highlight, boolean useDocLang)
throws IOException, InvalidTokenOffsetsException {
List<TextHit> results = new ArrayList<>() ;
@@ -526,14 +544,15 @@ public class TextIndexLucene implements TextIndex {
for ( ScoreDoc sd : sDocs ) {
Document doc = indexSearcher.doc(sd.doc) ;
- log.trace("highlightResults[{}]: {}", sd.doc, doc) ;
String entity = doc.get(docDef.getEntityField()) ;
Node literal = null;
String lexical = doc.get(field) ;
+ String docLang = doc.get(docDef.getLangField()) ;
+ String effectiveField = useDocLang ? field + "_" + docLang : field;
+ log.trace("highlightResults[{}]: {}, field: {}, lexical: {}, docLang: {}, effectiveField: {}", sd.doc, doc, field, lexical, docLang, effectiveField) ;
if (lexical != null) {
- String docLang = doc.get(docDef.getLangField()) ;
- TokenStream tokenStream = analyzer.tokenStream(field, lexical);
+ TokenStream tokenStream = queryAnalyzer.tokenStream(effectiveField, lexical);
TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, lexical, opts.joinFrags, opts.maxFrags);
String rez = frags2string(frags, opts);
@@ -549,56 +568,81 @@ public class TextIndexLucene implements TextIndex {
}
return results ;
}
+
+ private Map<String, Analyzer> multilingualQueryAnalyzers = new HashMap<>();
+
+ private Analyzer getQueryAnalyzer(boolean usingSearchFor, String lang) {
+ if (usingSearchFor) {
+ Analyzer qa = multilingualQueryAnalyzers.get(lang);
+ if (qa == null) {
+ qa = new PerFieldAnalyzerWrapper(new QueryMultilingualAnalyzer(defaultAnalyzer, lang), analyzerPerField);
+ multilingualQueryAnalyzers.put(lang, qa);
+ }
+ return qa;
+ } else {
+ return queryAnalyzer;
+ }
+ }
private List<TextHit> query$(IndexReader indexReader, Node property, String qs, String graphURI, String lang, int limit, String highlight)
throws ParseException, IOException, InvalidTokenOffsetsException {
- String textField = docDef.getField(property);
- String textClause;
- String langClause = null;
- String graphClause = null;
-
- //for language-based search extension
- if (getDocDef().getLangField() != null) {
- String langField = getDocDef().getLangField();
- if (StringUtils.isNotEmpty(lang)) {
- if (this.isMultilingual && !lang.equals("none")) {
- textField = (textField == null ? docDef.getPrimaryField() : textField) + "_" + lang;
- }
- langClause = !"none".equals(lang)?
- langField + ":" + lang : "-" + langField + ":*";
+ String textField = docDef.getField(property) != null ? docDef.getField(property) : docDef.getPrimaryField();
+ String textClause = "";
+ String langField = getDocDef().getLangField();
+
+ List<String> searchForTags = Util.getSearchForTags(lang);
+ boolean usingSearchFor = !searchForTags.isEmpty();
+ if (usingSearchFor) {
+ for (String tag : searchForTags) {
+ String tf = textField + "_" + tag;
+ textClause += tf + ":" + qs + " ";
}
+ } else {
+ if (this.isMultilingual && StringUtils.isNotEmpty(lang) && !lang.equals("none")) {
+ textField += "_" + lang;
+ }
+
+ if (docDef.getField(property) != null) {
+ textClause = textField + ":" + qs;
+ } else {
+ textClause = qs;
+ }
+
+ String langClause = null;
+ if (langField != null) {
+ langClause = StringUtils.isNotEmpty(lang) ? (!lang.equals("none") ? langField + ":" + lang : "-" + langField + ":*") : null;
+ }
+ if (langClause != null)
+ textClause = "(" + textClause + ") AND " + langClause ;
}
-
- if (textField != null)
- textClause = textField + ":" + qs ;
- else
- textClause = qs ;
- String effectiveField = (textField != null) ? textField : docDef.getPrimaryField();
-
+ String graphClause = null;
if (graphURI != null) {
String escaped = QueryParserBase.escape(graphURI) ;
graphClause = getDocDef().getGraphField() + ":" + escaped ;
}
-
+
String queryString = textClause ;
- if (langClause != null)
- queryString = "(" + queryString + ") AND " + langClause ;
+
if (graphClause != null)
queryString = "(" + queryString + ") AND " + graphClause ;
+
+ Analyzer qa = getQueryAnalyzer(usingSearchFor, lang);
+ Query query = parseQuery(queryString, qa) ;
+
+ if ( limit <= 0 )
+ limit = MAX_N ;
- log.debug("Lucene query: {} ({})", queryString, limit) ;
+ log.debug("Lucene queryString: {}, parsed query: {}, limit:{}", queryString, query, limit) ;
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
- Query query = parseQuery(queryString, queryAnalyzer) ;
- if ( limit <= 0 )
- limit = MAX_N ;
+
ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
if (highlight != null) {
- return highlightResults(sDocs, indexSearcher, query, effectiveField, highlight);
+ return highlightResults(sDocs, indexSearcher, query, textField, highlight, usingSearchFor);
} else {
- return simpleResults(sDocs, indexSearcher, query, effectiveField);
+ return simpleResults(sDocs, indexSearcher, query, textField);
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
new file mode 100644
index 0000000..9f3b890
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer ;
+
+import org.apache.lucene.analysis.Analyzer ;
+import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Lucene Analyzer implementation that delegates to a language-specific
+ * Analyzer based on a field name suffix: e.g. field="label_en" will use
+ * an EnglishAnalyzer.
+ */
+
+public class IndexingMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
+ private static Logger log = LoggerFactory.getLogger(IndexingMultilingualAnalyzer.class);
+
+ private Analyzer defaultAnalyzer;
+
+ public IndexingMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+ super(PER_FIELD_REUSE_STRATEGY);
+ this.defaultAnalyzer = defaultAnalyzer;
+ }
+
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ int idx = fieldName.lastIndexOf("_");
+ if (idx == -1) { // not language-specific, e.g. "label"
+ return defaultAnalyzer;
+ }
+ String lang = fieldName.substring(idx+1);
+ Analyzer analyzer = Util.getIndexAnalyzer(lang);
+ analyzer = analyzer != null ? analyzer : Util.getLocalizedAnalyzer(lang);
+ analyzer = analyzer != null ? analyzer : defaultAnalyzer;
+ log.trace("getWrappedAnalyzer fieldName: {}, analyzer: {}", fieldName, analyzer);
+ return analyzer;
+ }
+
+ @Override
+ public String toString() {
+ return "IndexingMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
index 1ba21d1..f3fb451 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
@@ -20,6 +20,8 @@ package org.apache.jena.query.text.analyzer ;
import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
@@ -29,6 +31,7 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
*/
public class MultilingualAnalyzer extends DelegatingAnalyzerWrapper {
+ private static Logger log = LoggerFactory.getLogger(MultilingualAnalyzer.class);
private Analyzer defaultAnalyzer;
public MultilingualAnalyzer(Analyzer defaultAnalyzer) {
@@ -44,7 +47,9 @@ public class MultilingualAnalyzer extends DelegatingAnalyzerWrapper {
}
String lang = fieldName.substring(idx+1);
Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
- return (analyzer != null ? analyzer : defaultAnalyzer);
+ analyzer = analyzer != null ? analyzer : defaultAnalyzer;
+ log.trace("getWrappedAnalyzer {}", analyzer);
+ return analyzer;
}
@Override
http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
new file mode 100644
index 0000000..de35e9e
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer ;
+
+import org.apache.lucene.analysis.Analyzer ;
+import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Lucene Analyzer implementation that delegates to a language-specific
+ * Analyzer based on a field name suffix: e.g. field="label_en" will use
+ * an EnglishAnalyzer.
+ */
+
+public class QueryMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
+ private static Logger log = LoggerFactory.getLogger(QueryMultilingualAnalyzer.class);
+ private Analyzer defaultAnalyzer;
+ private String langTag;
+
+ public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+ super(PER_FIELD_REUSE_STRATEGY);
+ this.defaultAnalyzer = defaultAnalyzer;
+ this.langTag = null;
+ }
+
+ public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer, String tag) {
+ super(PER_FIELD_REUSE_STRATEGY);
+ this.defaultAnalyzer = defaultAnalyzer;
+ this.langTag = tag;
+ }
+
+ @Override
+ /**
+ * The analyzer corresponding to the langTag supplied at instantiation
+ * is used to retrieve the analyzer to use regardless of the tag on the
+ * fieldName. If no langTag is supplied then the tag on fieldName is
+ * used to retrieve the analyzer as with the MultilingualAnalyzer
+ *
+ * @param fieldName
+ * @return the analyzer to use in the search
+ */
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ int idx = fieldName.lastIndexOf("_");
+ if (idx == -1) { // not language-specific, e.g. "label"
+ return defaultAnalyzer;
+ }
+ String lang = langTag != null ? langTag : fieldName.substring(idx+1);
+ Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
+ analyzer = analyzer != null ? analyzer : defaultAnalyzer;
+ log.trace("getWrappedAnalyzer langTag: {}, fieldName: {}, analyzer: {}", langTag, fieldName, analyzer);
+ return analyzer;
+ }
+
+ @Override
+ public String toString() {
+ return "QueryMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 6ad0747..b41baa1 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -18,18 +18,30 @@
package org.apache.jena.query.text.analyzer;
+import org.apache.commons.lang3.StringUtils;
import org.apache.jena.rdf.model.Resource;
import org.apache.lucene.analysis.Analyzer;
import java.lang.reflect.Constructor;
+import java.util.ArrayList;
import java.util.Hashtable;
+import java.util.List;
public class Util {
- private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
- private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+ private static Hashtable<String, Class<?>> analyzersClasses; //mapping between BCP-47 language tags and lucene analyzersClasses
+ private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multiple analyzer instantiations
// cache of defined text:defineAnalyzers
private static Hashtable<String, Analyzer> definedAnalyzers = new Hashtable<>();
+
+ // cache of defined text:indexAnalyzers
+ private static Hashtable<String, Analyzer> indexAnalyzers = new Hashtable<>();
+
+ // cache of text:searchFor language tags
+ private static Hashtable<String, List<String>> searchForTags = new Hashtable<>();
+
+ // map of auxiliary index info
+ private static Hashtable<String, List<String>> auxIndexes = new Hashtable<>();
static {
initAnalyzerDefs();
@@ -67,6 +79,41 @@ public class Util {
public static void defineAnalyzer(Resource key, Analyzer analyzer) {
definedAnalyzers.put(key.getURI(), analyzer);
}
+
+ public static Analyzer getIndexAnalyzer(String tag) {
+ return indexAnalyzers.get(tag);
+ }
+
+ public static void addIndexAnalyzer(String tag, Analyzer analyzer) {
+ indexAnalyzers.put(tag, analyzer);
+ }
+
+ public static boolean usingIndexAnalyzers() {
+ return !indexAnalyzers.isEmpty();
+ }
+
+ public static List<String> getSearchForTags(String tag) {
+ List<String> tags = new ArrayList<>();
+ if (StringUtils.isNotEmpty(tag)) {
+ List<String> x = searchForTags.get(tag);
+ if (x != null) {
+ tags = x;
+ }
+ }
+ return tags;
+ }
+
+ public static void addSearchForTags(String tag, List<String> tags) {
+ searchForTags.put(tag, tags);
+ }
+
+ public static List<String> getAuxIndexes(String tag) {
+ return StringUtils.isNotEmpty(tag) ? auxIndexes.get(tag) : new ArrayList<>();
+ }
+
+ public static void addAuxIndexes(String tag, List<String> tags) {
+ auxIndexes.put(tag, tags);
+ }
private static void initAnalyzerDefs() {
analyzersClasses = new Hashtable<>();
http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
index 6326128..6977f13 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -18,6 +18,9 @@
package org.apache.jena.query.text.assembler;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.jena.assembler.Assembler;
import org.apache.jena.query.text.TextIndexException;
import org.apache.jena.query.text.analyzer.Util;
@@ -26,6 +29,8 @@ import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.vocabulary.RDF;
import org.apache.lucene.analysis.Analyzer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DefineAnalyzersAssembler {
/*
@@ -39,7 +44,46 @@ public class DefineAnalyzersAssembler {
text:analyzer [ . . . ]]
)
*/
+ private static Logger log = LoggerFactory.getLogger(DefineAnalyzersAssembler.class) ;
+
+ private static List<String> getStringList(Statement stmt, String p) {
+ List<String> tags = new ArrayList<String>();
+ RDFNode aNode = stmt.getObject();
+ if (! aNode.isResource()) {
+ throw new TextIndexException(p + " property is not a list : " + aNode);
+ }
+ Resource current = (Resource) aNode;
+ while (current != null && ! current.equals(RDF.nil)) {
+ Statement firstStmt = current.getProperty(RDF.first);
+ if (firstStmt == null) {
+ throw new TextIndexException(p + " list not well formed: " + current);
+ }
+
+ RDFNode first = firstStmt.getObject();
+ if (! first.isLiteral()) {
+ throw new TextIndexException(p + " list not a String : " + first);
+ }
+
+ String tag = first.toString();
+ tags.add(tag);
+
+ Statement restStmt = current.getProperty(RDF.rest);
+ if (restStmt == null) {
+ throw new TextIndexException(p + " list not terminated by rdf:nil");
+ }
+
+ RDFNode rest = restStmt.getObject();
+ if (! rest.isResource()) {
+ throw new TextIndexException(p + " list rest node is not a resource : " + rest);
+ }
+
+ current = (Resource) rest;
+ }
+
+ return tags;
+ }
+
public static boolean open(Assembler a, Resource list) {
Resource current = list;
boolean isMultilingualSupport = false;
@@ -67,13 +111,6 @@ public class DefineAnalyzersAssembler {
// calls GenericAnalyzerAssembler
Analyzer analyzer = (Analyzer) a.open((Resource) analyzerNode);
- if (adding.hasProperty(TextVocab.pAddLang)) {
- Statement langStmt = adding.getProperty(TextVocab.pAddLang);
- String langCode = langStmt.getString();
- Util.addAnalyzer(langCode, analyzer);
- isMultilingualSupport = true;
- }
-
if (adding.hasProperty(TextVocab.pDefAnalyzer)) {
Statement defStmt = adding.getProperty(TextVocab.pDefAnalyzer);
Resource id = defStmt.getResource();
@@ -84,6 +121,37 @@ public class DefineAnalyzersAssembler {
throw new TextIndexException("addAnalyzers text:defineAnalyzer property must be a non-blank resource: " + adding);
}
}
+
+ String langCode = null;
+
+ if (adding.hasProperty(TextVocab.pAddLang)) {
+ Statement langStmt = adding.getProperty(TextVocab.pAddLang);
+ langCode = langStmt.getString();
+ Util.addAnalyzer(langCode, analyzer);
+ isMultilingualSupport = true;
+ }
+
+ if (langCode != null && adding.hasProperty(TextVocab.pSearchFor)) {
+ Statement searchForStmt = adding.getProperty(TextVocab.pSearchFor);
+ List<String> tags = getStringList(searchForStmt, "text:searchFor");
+ Util.addSearchForTags(langCode, tags);
+ }
+
+ if (langCode != null && adding.hasProperty(TextVocab.pAuxIndex)) {
+ Statement searchForStmt = adding.getProperty(TextVocab.pAuxIndex);
+ List<String> tags = getStringList(searchForStmt, "text:auxIndex");
+ Util.addAuxIndexes(langCode, tags);
+ log.trace("addAuxIndexes for {} with tags: {}", langCode, tags);
+ }
+
+
+ if (adding.hasProperty(TextVocab.pIndexAnalyzer)) {
+ Statement indexStmt = adding.getProperty(TextVocab.pIndexAnalyzer);
+ Resource key = indexStmt.getResource();
+ Analyzer indexer = Util.getDefinedAnalyzer(key);
+ Util.addIndexAnalyzer(langCode, indexer);
+ log.trace("addIndexAnalyzer lang: {} with analyzer: {}", langCode, indexer);
+ }
}
Statement restStmt = current.getProperty(RDF.rest);
http://git-wip-us.apache.org/repos/asf/jena/blob/0d07ca90/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 187715a4..973a3a4 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -108,6 +108,10 @@ public class TextVocab
public static final Property pDefTokenizer = Vocab.property(NS, "defineTokenizer");
public static final Property pAddLang = Vocab.property(NS, "addLang");
public static final Property pUseAnalyzer = Vocab.property(NS, "useAnalyzer");
+ public static final Property pSearchFor = Vocab.property(NS, "searchFor");
+ public static final Property pAuxIndex = Vocab.property(NS, "auxIndex");
+ public static final Property pIndexAnalyzer = Vocab.property(NS, "indexAnalyzer");
+ public static final Property indexAnalyzer = Vocab.property(NS, "IndexAnalyzer");
// Query Cache
public static final Property pCacheQueries = Vocab.property(NS, "cacheQueries");
[5/6] jena git commit: added auxIndex unit test
Posted by co...@apache.org.
added auxIndex unit test
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/28f94076
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/28f94076
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/28f94076
Branch: refs/heads/master
Commit: 28f9407600b6c6fe8363c23015ca141021404578
Parents: e4c2b91
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Tue Jun 19 09:25:06 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Tue Jun 19 09:25:06 2018 -0500
----------------------------------------------------------------------
.../text/TestTextMultilingualEnhancements.java | 88 +++++++++++++++++++-
1 file changed, 84 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/28f94076/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
index c9e7f05..8170408 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
@@ -85,7 +85,15 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
" text:directory \"mem\" ;",
" text:storeValues true ;",
" text:entityMap :entMap ;",
+ " text:multilingualSupport true ;",
" text:defineAnalyzers (",
+ " [ text:defineAnalyzer :folding ;",
+ " text:analyzer [",
+ " a text:ConfigurableAnalyzer ;",
+ " text:tokenizer text:StandardTokenizer ;",
+ " text:filters (text:LowerCaseFilter text:ASCIIFoldingFilter) ;",
+ " ]",
+ " ]",
" [ text:addLang \"en-01\" ;",
" text:searchFor ( \"en-01\" \"en-02\" ) ;",
" text:analyzer [ a text:StandardAnalyzer ]",
@@ -100,6 +108,17 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
" [ text:addLang \"en-04\" ;",
" text:analyzer [ a text:StandardAnalyzer ]",
" ]",
+ " [ text:addLang \"en-05\" ;",
+ " text:searchFor ( \"en-05\" \"en-aux\" ) ;",
+ " text:analyzer [ a text:StandardAnalyzer ]",
+ " ]",
+ " [ text:addLang \"en-aux\" ;",
+ " text:searchFor ( \"en-05\" \"en-aux\" ) ;",
+ " text:analyzer [ ",
+ " a text:DefinedAnalyzer ; ",
+ " text:useAnalyzer :folding",
+ " ]",
+ " ]",
" ) ;",
" .",
"",
@@ -160,6 +179,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
assertNotNull(literal);
literals.put(entityUri, literal);
}
+ System.err.println("Query: " + queryString + ", COUNT: " + count + ", Expected; " + expectedEntityURIs.size());
assertEquals(expectedEntityURIs.size(), count);
}
finally {
@@ -169,7 +189,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
}
@Test
- public void testTextMultilingualFeatures1() {
+ public void testTextSearchFor1() {
final String turtleA = StrUtils.strjoinNL(
TURTLE_PROLOG,
"<" + RESOURCE_BASE + "testResultOneInModelA>",
@@ -184,7 +204,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
QUERY_PROLOG,
"SELECT ?s ?lit",
"WHERE {",
- " (?s ?sc ?lit ?g) text:query ( \"green\" ) . ",
+ " (?s ?sc ?lit ?g) text:query ( \"green\"@en-02 ) . ",
"}"
);
Set<String> expectedURIs = new HashSet<>() ;
@@ -200,7 +220,7 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
}
@Test
- public void testTextMultilingualFeatures2() {
+ public void testTextSearchFor2() {
final String turtleA = StrUtils.strjoinNL(
TURTLE_PROLOG,
"<" + RESOURCE_BASE + "testResultOneInModelA>",
@@ -215,7 +235,36 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
QUERY_PROLOG,
"SELECT ?s ?lit",
"WHERE {",
- " (?s ?sc ?lit ?g) text:query ( \"flower\" ) . ",
+ " (?s ?sc ?lit ?g) text:query ( \"flower\"@en-01 ) . ",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA")) ;
+
+ Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+ assertEquals(1, literals.size());
+
+ Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+ assertNotNull(value);
+ }
+
+ @Test
+ public void testTextSimple1() {
+ final String turtleA = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testResultOneInModelA>",
+ " rdfs:label \"one green flower\"@en-03",
+ ".",
+ "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+ " rdfs:label \"two green flowers\"@en-04",
+ "."
+ );
+ putTurtleInModel(turtleA, "http://example.org/modelA") ;
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?lit",
+ "WHERE {",
+ " (?s ?sc ?lit ?g) text:query ( \"green\"@en-03 ) . ",
"}"
);
Set<String> expectedURIs = new HashSet<>() ;
@@ -227,4 +276,35 @@ public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTex
Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
assertNotNull(value);
}
+
+ @Test
+ public void testTextAux1() {
+ final String turtleA = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testResultOneInModelA>",
+ " rdfs:label \"one Green flower\"@en-05",
+ ".",
+ "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+ " rdfs:label \"two gReeN flowers\"@en-05",
+ "."
+ );
+ putTurtleInModel(turtleA, "http://example.org/modelA") ;
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?lit",
+ "WHERE {",
+ " (?s ?sc ?lit ?g) text:query ( \"green\"@en-aux ) . ",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA", RESOURCE_BASE + "testResultTwoInModelA")) ;
+
+ Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+ assertEquals(2, literals.size());
+
+ Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+ assertNotNull(value);
+ value = literals.get(RESOURCE_BASE + "testResultTwoInModelA");
+ assertNotNull(value);
+ }
}
[4/6] jena git commit: added searchFor unit tests
Posted by co...@apache.org.
added searchFor unit tests
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/e4c2b918
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/e4c2b918
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/e4c2b918
Branch: refs/heads/master
Commit: e4c2b918c5db86de8c0176d5f5725d570e96a946
Parents: fee0151
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Fri Jun 15 14:49:18 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Fri Jun 15 14:49:18 2018 -0500
----------------------------------------------------------------------
.../org/apache/jena/query/text/TS_Text.java | 1 +
.../text/TestTextMultilingualEnhancements.java | 230 +++++++++++++++++++
2 files changed, 231 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/e4c2b918/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 362a578..c1f7037 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -59,6 +59,7 @@ import org.junit.runners.Suite.SuiteClasses;
, TestTextGraphIndexExtra2.class
, TestTextHighlighting.class
, TestTextDefineAnalyzers.class
+ , TestTextMultilingualEnhancements.class
})
public class TS_Text
http://git-wip-us.apache.org/repos/asf/jena/blob/e4c2b918/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
new file mode 100644
index 0000000..c9e7f05
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextMultilingualEnhancements.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.Reader ;
+import java.io.StringReader ;
+import java.util.Arrays ;
+import java.util.HashMap ;
+import java.util.HashSet ;
+import java.util.Map ;
+import java.util.Set ;
+
+import org.apache.jena.assembler.Assembler ;
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.query.Dataset ;
+import org.apache.jena.query.Query ;
+import org.apache.jena.query.QueryExecution ;
+import org.apache.jena.query.QueryExecutionFactory ;
+import org.apache.jena.query.QueryFactory ;
+import org.apache.jena.query.QuerySolution ;
+import org.apache.jena.query.ReadWrite ;
+import org.apache.jena.query.ResultSet ;
+import org.apache.jena.query.text.assembler.TextAssembler ;
+import org.apache.jena.rdf.model.Literal;
+import org.apache.jena.rdf.model.Model ;
+import org.apache.jena.rdf.model.ModelFactory ;
+import org.apache.jena.rdf.model.Resource ;
+import org.junit.After ;
+import org.junit.Before ;
+import org.junit.Test ;
+
+public class TestTextMultilingualEnhancements extends AbstractTestDatasetWithTextIndexBase {
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a tdb:DatasetTDB ;",
+ " tdb:location \"--mem--\" ;",
+ " tdb:unionDefaultGraph true ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory \"mem\" ;",
+ " text:storeValues true ;",
+ " text:entityMap :entMap ;",
+ " text:defineAnalyzers (",
+ " [ text:addLang \"en-01\" ;",
+ " text:searchFor ( \"en-01\" \"en-02\" ) ;",
+ " text:analyzer [ a text:StandardAnalyzer ]",
+ " ]",
+ " [ text:addLang \"en-02\" ;",
+ " text:searchFor ( \"en-01\" \"en-02\" ) ;",
+ " text:analyzer [ a text:StandardAnalyzer ]",
+ " ]",
+ " [ text:addLang \"en-03\" ;",
+ " text:analyzer [ a text:StandardAnalyzer ]",
+ " ]",
+ " [ text:addLang \"en-04\" ;",
+ " text:analyzer [ a text:StandardAnalyzer ]",
+ " ]",
+ " ) ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:langField \"lang\" ;",
+ " text:graphField \"graph\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; text:predicate rdfs:label ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ @Before
+ public void before() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+ @After
+ public void after() {
+ dataset.close();
+ }
+
+ private void putTurtleInModel(String turtle, String modelName) {
+ Model model = modelName != null ? dataset.getNamedModel(modelName) : dataset.getDefaultModel() ;
+ Reader reader = new StringReader(turtle) ;
+ dataset.begin(ReadWrite.WRITE) ;
+ try {
+ model.read(reader, "", "TURTLE") ;
+ dataset.commit() ;
+ }
+ finally {
+ dataset.end();
+ }
+ }
+
+ protected Map<String,Literal> doTestSearchWithLiterals(String queryString, Set<String> expectedEntityURIs) {
+ Map<String,Literal> literals = new HashMap<>();
+ Query query = QueryFactory.create(queryString) ;
+ dataset.begin(ReadWrite.READ);
+ try(QueryExecution qexec = QueryExecutionFactory.create(query, dataset)) {
+ ResultSet results = qexec.execSelect() ;
+ assertEquals(expectedEntityURIs.size() > 0, results.hasNext());
+ int count;
+ for (count=0; results.hasNext(); count++) {
+ QuerySolution soln = results.nextSolution();
+ String entityUri = soln.getResource("s").getURI();
+ assertTrue(expectedEntityURIs.contains(entityUri));
+ Literal literal = soln.getLiteral("lit");
+ assertNotNull(literal);
+ literals.put(entityUri, literal);
+ }
+ assertEquals(expectedEntityURIs.size(), count);
+ }
+ finally {
+ dataset.end() ;
+ }
+ return literals;
+ }
+
+ @Test
+ public void testTextMultilingualFeatures1() {
+ final String turtleA = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testResultOneInModelA>",
+ " rdfs:label \"one green flower\"@en-01",
+ ".",
+ "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+ " rdfs:label \"two green flowers\"@en-02",
+ "."
+ );
+ putTurtleInModel(turtleA, "http://example.org/modelA") ;
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?lit",
+ "WHERE {",
+ " (?s ?sc ?lit ?g) text:query ( \"green\" ) . ",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA", RESOURCE_BASE + "testResultTwoInModelA")) ;
+
+ Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+ assertEquals(2, literals.size());
+
+ Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+ assertNotNull(value);
+ value = literals.get(RESOURCE_BASE + "testResultTwoInModelA");
+ assertNotNull(value);
+ }
+
+ @Test
+ public void testTextMultilingualFeatures2() {
+ final String turtleA = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testResultOneInModelA>",
+ " rdfs:label \"one green flower\"@en-01",
+ ".",
+ "<" + RESOURCE_BASE + "testResultTwoInModelA>",
+ " rdfs:label \"two green flowers\"@en-02",
+ "."
+ );
+ putTurtleInModel(turtleA, "http://example.org/modelA") ;
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s ?lit",
+ "WHERE {",
+ " (?s ?sc ?lit ?g) text:query ( \"flower\" ) . ",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + "testResultOneInModelA")) ;
+
+ Map<String, Literal> literals = doTestSearchWithLiterals(queryString, expectedURIs) ;
+ assertEquals(1, literals.size());
+
+ Literal value = literals.get(RESOURCE_BASE + "testResultOneInModelA");
+ assertNotNull(value);
+ }
+}
[2/6] jena git commit: cleanup per comments from afs
Posted by co...@apache.org.
cleanup per comments from afs
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/83492171
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/83492171
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/83492171
Branch: refs/heads/master
Commit: 83492171b8e61e42ebde29293809d2c8a2d80d0a
Parents: 0d07ca9
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Thu Jun 14 08:42:03 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Thu Jun 14 08:42:03 2018 -0500
----------------------------------------------------------------------
.../src/main/java/org/apache/jena/query/text/TextIndexLucene.java | 1 -
.../org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java | 1 -
.../jena/query/text/analyzer/IndexingMultilingualAnalyzer.java | 1 -
.../apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java | 1 -
.../org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java | 1 -
.../jena/query/text/analyzer/QueryMultilingualAnalyzer.java | 1 -
.../jena/query/text/assembler/DefineAnalyzersAssembler.java | 3 +--
.../main/java/org/apache/jena/query/text/assembler/TextVocab.java | 1 -
8 files changed, 1 insertion(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index cd4d63f..0e70688 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -433,7 +433,6 @@ public class TextIndexLucene implements TextIndex {
throw new TextIndexParseException(qs, ex.getMessage()) ;
}
catch (Exception ex) {
- ex.printStackTrace(); // TEMPORARY
throw new TextIndexException("query", ex) ;
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
index 8d54d2c..b5bbeea 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/ConfigurableAnalyzer.java
@@ -40,7 +40,6 @@ import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-
/**
* Lucene Analyzer implementation that can be configured with different
* Tokenizer and (optionally) TokenFilter implementations.
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
index 9f3b890..6faf82a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
/**
* Lucene Analyzer implementation that delegates to a language-specific
* Analyzer based on a field name suffix: e.g. field="label_en" will use
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
index 071569b..63852fd 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
@@ -22,7 +22,6 @@ import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.core.KeywordTokenizer ;
import org.apache.lucene.analysis.core.LowerCaseFilter ;
-
/**
* Lucene Analyzer implementation that works like KeywordAnalyzer (i.e.
* doesn't tokenize the input, keeps it as a single token), but forces text
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
index f3fb451..de67f9a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/MultilingualAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
/**
* Lucene Analyzer implementation that delegates to a language-specific
* Analyzer based on a field name suffix: e.g. field="label_en" will use
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
index de35e9e..33005cf 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
@@ -23,7 +23,6 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
/**
* Lucene Analyzer implementation that delegates to a language-specific
* Analyzer based on a field name suffix: e.g. field="label_en" will use
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
index 6977f13..c9dc9fe 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -143,8 +143,7 @@ public class DefineAnalyzersAssembler {
Util.addAuxIndexes(langCode, tags);
log.trace("addAuxIndexes for {} with tags: {}", langCode, tags);
}
-
-
+
if (adding.hasProperty(TextVocab.pIndexAnalyzer)) {
Statement indexStmt = adding.getProperty(TextVocab.pIndexAnalyzer);
Resource key = indexStmt.getResource();
http://git-wip-us.apache.org/repos/asf/jena/blob/83492171/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 973a3a4..6a2922b 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -111,7 +111,6 @@ public class TextVocab
public static final Property pSearchFor = Vocab.property(NS, "searchFor");
public static final Property pAuxIndex = Vocab.property(NS, "auxIndex");
public static final Property pIndexAnalyzer = Vocab.property(NS, "indexAnalyzer");
- public static final Property indexAnalyzer = Vocab.property(NS, "IndexAnalyzer");
// Query Cache
public static final Property pCacheQueries = Vocab.property(NS, "cacheQueries");
[6/6] jena git commit: This closes #436 - Merge branch
'Jena-1556-MutilingualEnhancements-3.8.0'
Posted by co...@apache.org.
This closes #436 - Merge branch 'Jena-1556-MutilingualEnhancements-3.8.0'
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/fe9bdefa
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/fe9bdefa
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/fe9bdefa
Branch: refs/heads/master
Commit: fe9bdefa42c84d5235555fa160a5f221e9656944
Parents: 3e999d5 28f9407
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Jun 20 15:58:27 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Jun 20 15:58:27 2018 -0500
----------------------------------------------------------------------
.../apache/jena/query/text/TextIndexLucene.java | 155 ++++++----
.../text/analyzer/ConfigurableAnalyzer.java | 1 -
.../analyzer/IndexingMultilingualAnalyzer.java | 61 ++++
.../text/analyzer/LowerCaseKeywordAnalyzer.java | 1 -
.../text/analyzer/MultilingualAnalyzer.java | 8 +-
.../analyzer/QueryMultilingualAnalyzer.java | 76 +++++
.../apache/jena/query/text/analyzer/Util.java | 51 ++-
.../assembler/DefineAnalyzersAssembler.java | 81 ++++-
.../jena/query/text/assembler/TextVocab.java | 3 +
.../org/apache/jena/query/text/TS_Text.java | 1 +
.../text/TestTextMultilingualEnhancements.java | 310 +++++++++++++++++++
11 files changed, 679 insertions(+), 69 deletions(-)
----------------------------------------------------------------------
[3/6] jena git commit: various cleanup per @kinow
Posted by co...@apache.org.
various cleanup per @kinow
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/fee01519
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/fee01519
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/fee01519
Branch: refs/heads/master
Commit: fee015197ccd58002e8af33fe7d39aff767d8a42
Parents: 8349217
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Fri Jun 15 09:07:10 2018 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Fri Jun 15 09:07:10 2018 -0500
----------------------------------------------------------------------
.../apache/jena/query/text/TextIndexLucene.java | 4 +-
.../analyzer/IndexingMultilingualAnalyzer.java | 47 ++++++------
.../analyzer/QueryMultilingualAnalyzer.java | 75 ++++++++++----------
.../apache/jena/query/text/analyzer/Util.java | 2 +-
.../assembler/DefineAnalyzersAssembler.java | 2 +-
5 files changed, 66 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 0e70688..120e83f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -102,6 +102,8 @@ public class TextIndexLucene implements TextIndex {
private final String queryParserType ;
private final FieldType ftText ;
private final boolean isMultilingual ;
+
+ private Map<String, Analyzer> multilingualQueryAnalyzers = new HashMap<>();
// The IndexWriter can't be final because we may have to recreate it if rollback() is called.
// However, it needs to be volatile in case the next write transaction is on a different thread,
@@ -568,8 +570,6 @@ public class TextIndexLucene implements TextIndex {
return results ;
}
- private Map<String, Analyzer> multilingualQueryAnalyzers = new HashMap<>();
-
private Analyzer getQueryAnalyzer(boolean usingSearchFor, String lang) {
if (usingSearchFor) {
Analyzer qa = multilingualQueryAnalyzers.get(lang);
http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
index 6faf82a..3bab0d2 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/IndexingMultilingualAnalyzer.java
@@ -18,6 +18,7 @@
package org.apache.jena.query.text.analyzer ;
+import org.apache.commons.lang3.ObjectUtils;
import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.slf4j.Logger;
@@ -30,31 +31,31 @@ import org.slf4j.LoggerFactory;
*/
public class IndexingMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
- private static Logger log = LoggerFactory.getLogger(IndexingMultilingualAnalyzer.class);
-
- private Analyzer defaultAnalyzer;
+ private static Logger log = LoggerFactory.getLogger(IndexingMultilingualAnalyzer.class);
- public IndexingMultilingualAnalyzer(Analyzer defaultAnalyzer) {
- super(PER_FIELD_REUSE_STRATEGY);
- this.defaultAnalyzer = defaultAnalyzer;
- }
+ private Analyzer defaultAnalyzer;
- @Override
- protected Analyzer getWrappedAnalyzer(String fieldName) {
- int idx = fieldName.lastIndexOf("_");
- if (idx == -1) { // not language-specific, e.g. "label"
- return defaultAnalyzer;
- }
- String lang = fieldName.substring(idx+1);
- Analyzer analyzer = Util.getIndexAnalyzer(lang);
- analyzer = analyzer != null ? analyzer : Util.getLocalizedAnalyzer(lang);
- analyzer = analyzer != null ? analyzer : defaultAnalyzer;
- log.trace("getWrappedAnalyzer fieldName: {}, analyzer: {}", fieldName, analyzer);
- return analyzer;
- }
+ public IndexingMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+ super(PER_FIELD_REUSE_STRATEGY);
+ this.defaultAnalyzer = defaultAnalyzer;
+ }
- @Override
- public String toString() {
- return "IndexingMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ int idx = fieldName.lastIndexOf("_");
+ if (idx == -1) { // not language-specific, e.g. "label"
+ return defaultAnalyzer;
}
+ String lang = fieldName.substring(idx+1);
+ Analyzer analyzer = Util.getIndexAnalyzer(lang);
+ analyzer = ObjectUtils.defaultIfNull(analyzer, Util.getLocalizedAnalyzer(lang));
+ analyzer = ObjectUtils.defaultIfNull(analyzer, defaultAnalyzer);
+ log.trace("getWrappedAnalyzer fieldName: {}, analyzer: {}", fieldName, analyzer);
+ return analyzer;
+ }
+
+ @Override
+ public String toString() {
+ return "IndexingMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+ }
}
http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
index 33005cf..de16c7a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/QueryMultilingualAnalyzer.java
@@ -18,6 +18,7 @@
package org.apache.jena.query.text.analyzer ;
+import org.apache.commons.lang3.ObjectUtils;
import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.slf4j.Logger;
@@ -30,46 +31,46 @@ import org.slf4j.LoggerFactory;
*/
public class QueryMultilingualAnalyzer extends DelegatingAnalyzerWrapper {
- private static Logger log = LoggerFactory.getLogger(QueryMultilingualAnalyzer.class);
- private Analyzer defaultAnalyzer;
- private String langTag;
+ private static Logger log = LoggerFactory.getLogger(QueryMultilingualAnalyzer.class);
+ private Analyzer defaultAnalyzer;
+ private String langTag;
- public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer) {
- super(PER_FIELD_REUSE_STRATEGY);
- this.defaultAnalyzer = defaultAnalyzer;
- this.langTag = null;
- }
+ public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer) {
+ super(PER_FIELD_REUSE_STRATEGY);
+ this.defaultAnalyzer = defaultAnalyzer;
+ this.langTag = null;
+ }
- public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer, String tag) {
- super(PER_FIELD_REUSE_STRATEGY);
- this.defaultAnalyzer = defaultAnalyzer;
- this.langTag = tag;
- }
+ public QueryMultilingualAnalyzer(Analyzer defaultAnalyzer, String tag) {
+ super(PER_FIELD_REUSE_STRATEGY);
+ this.defaultAnalyzer = defaultAnalyzer;
+ this.langTag = tag;
+ }
- @Override
- /**
- * The analyzer corresponding to the langTag supplied at instantiation
- * is used to retrieve the analyzer to use regardless of the tag on the
- * fieldName. If no langTag is supplied then the tag on fieldName is
- * used to retrieve the analyzer as with the MultilingualAnalyzer
- *
- * @param fieldName
- * @return the analyzer to use in the search
- */
- protected Analyzer getWrappedAnalyzer(String fieldName) {
- int idx = fieldName.lastIndexOf("_");
- if (idx == -1) { // not language-specific, e.g. "label"
- return defaultAnalyzer;
- }
- String lang = langTag != null ? langTag : fieldName.substring(idx+1);
- Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
- analyzer = analyzer != null ? analyzer : defaultAnalyzer;
- log.trace("getWrappedAnalyzer langTag: {}, fieldName: {}, analyzer: {}", langTag, fieldName, analyzer);
- return analyzer;
+ @Override
+ /**
+ * The analyzer corresponding to the langTag supplied at instantiation
+ * is used to retrieve the analyzer to use regardless of the tag on the
+ * fieldName. If no langTag is supplied then the tag on fieldName is
+ * used to retrieve the analyzer as with the MultilingualAnalyzer
+ *
+ * @param fieldName
+ * @return the analyzer to use in the search
+ */
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ int idx = fieldName.lastIndexOf("_");
+ if (idx == -1) { // not language-specific, e.g. "label"
+ return defaultAnalyzer;
}
+ String lang = ObjectUtils.defaultIfNull(langTag, fieldName.substring(idx+1));
+ Analyzer analyzer = Util.getLocalizedAnalyzer(lang);
+ analyzer = ObjectUtils.defaultIfNull(analyzer, defaultAnalyzer);
+ log.trace("getWrappedAnalyzer langTag: {}, fieldName: {}, analyzer: {}", langTag, fieldName, analyzer);
+ return analyzer;
+ }
- @Override
- public String toString() {
- return "QueryMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
- }
+ @Override
+ public String toString() {
+ return "QueryMultilingualAnalyzer(default=" + defaultAnalyzer + ")";
+ }
}
http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index b41baa1..1e7b85d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -108,7 +108,7 @@ public class Util {
}
public static List<String> getAuxIndexes(String tag) {
- return StringUtils.isNotEmpty(tag) ? auxIndexes.get(tag) : new ArrayList<>();
+ return StringUtils.isNotEmpty(tag) ? auxIndexes.get(tag) : null;
}
public static void addAuxIndexes(String tag, List<String> tags) {
http://git-wip-us.apache.org/repos/asf/jena/blob/fee01519/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
index c9dc9fe..876ca74 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/DefineAnalyzersAssembler.java
@@ -44,7 +44,7 @@ public class DefineAnalyzersAssembler {
text:analyzer [ . . . ]]
)
*/
- private static Logger log = LoggerFactory.getLogger(DefineAnalyzersAssembler.class) ;
+ private static Logger log = LoggerFactory.getLogger(DefineAnalyzersAssembler.class) ;
private static List<String> getStringList(Statement stmt, String p) {
List<String> tags = new ArrayList<String>();