You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/05/25 14:10:52 UTC
[1/6] jena git commit: Implementation of jena-text multilingual with
a single index
Repository: jena
Updated Branches:
refs/heads/master dc19466e7 -> 66a1eda82
Implementation of jena-text multilingual with a single index
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/9553c6b2
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/9553c6b2
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/9553c6b2
Branch: refs/heads/master
Commit: 9553c6b2c246bc9c05906096c1f56d65ba15fed8
Parents: 7ef374f
Author: Alexis Miara <al...@hotmail.com>
Authored: Wed May 13 11:23:56 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Wed May 13 11:23:56 2015 -0400
----------------------------------------------------------------------
.../java/org/apache/jena/query/text/Entity.java | 9 +-
.../org/apache/jena/query/text/LuceneUtil.java | 150 +++++++++++++
.../jena/query/text/TextDatasetFactory.java | 101 ++++++++-
.../org/apache/jena/query/text/TextIndex.java | 3 +
.../apache/jena/query/text/TextIndexLucene.java | 64 +++++-
.../query/text/TextIndexLuceneMultilingual.java | 33 +++
.../apache/jena/query/text/TextIndexSolr.java | 5 +
.../apache/jena/query/text/TextQueryFuncs.java | 9 +-
.../org/apache/jena/query/text/TextQueryPF.java | 41 +++-
.../assembler/LocalizedAnalyzerAssembler.java | 60 ++++++
.../query/text/assembler/TextAssembler.java | 2 +
.../TextIndexLuceneMultilingualAssembler.java | 87 ++++++++
.../jena/query/text/assembler/TextVocab.java | 3 +
.../org/apache/jena/query/text/TS_Text.java | 2 +
.../text/TestDatasetWithLocalizedAnalyzer.java | 147 +++++++++++++
...tDatasetWithLuceneMultilingualTextIndex.java | 216 +++++++++++++++++++
jena-text/testing/TextQuery/data.skos | 36 ++++
17 files changed, 946 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
index d770c5a..c7757a3 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
@@ -25,13 +25,18 @@ public class Entity
{
private final String id ;
private final String graph ;
+ private final String language ;
private final Map<String, Object> map = new HashMap<>() ;
public Entity(String entityId, String entityGraph) {
+ this(entityId, entityGraph, null);
+ }
+
+ public Entity(String entityId, String entityGraph, String lang) {
this.id = entityId ;
this.graph = entityGraph;
+ this.language = lang;
}
-
/** @deprecated Use {@linkplain #Entity(String, String)} */
@Deprecated
public Entity(String entityId) { this(entityId, null) ; }
@@ -40,6 +45,8 @@ public class Entity
public String getGraph() { return graph ; }
+ public String getLanguage() { return language ; }
+
public void put(String key, Object value)
{ map.put(key, value) ; }
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
new file mode 100644
index 0000000..7fafc4c
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.Version;
+import java.lang.reflect.Constructor;
+import java.util.Hashtable;
+
+public class LuceneUtil {
+
+ private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
+ private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+
+ static {
+ initAnalyzerDefs();
+ }
+
+ public static Analyzer getLocalizedAnalyzer(String lang) {
+ return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+ }
+
+ public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
+ lang = getISO2Language(lang);
+ if (lang == null)
+ return null;
+
+ if (cache.containsKey(lang))
+ return cache.get(lang);
+
+ try {
+ Class<?> className = analyzersClasses.get(lang);
+ if (className == null)
+ return null;
+ Constructor constructor = className.getConstructor(Version.class);
+ Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
+ cache.put(lang, analyzer);
+ return analyzer;
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static String getISO2Language(String lang) {
+ if (lang != null) {
+ lang = lang.split("-")[0].toLowerCase();
+ if (lang.length() == 2)
+ return lang;
+ else {
+ if ("ara".equals(lang)) return "ar";
+ if ("bul".equals(lang)) return "bg";
+ if ("cat".equals(lang)) return "ca";
+ if ("ces".equals(lang)) return "cs";
+ if ("cze".equals(lang)) return "cs";
+ if ("dan".equals(lang)) return "da";
+ if ("deu".equals(lang)) return "de";
+ if ("ger".equals(lang)) return "de";
+ if ("ell".equals(lang)) return "el";
+ if ("gre".equals(lang)) return "el";
+ if ("eng".equals(lang)) return "en";
+ if ("spa".equals(lang)) return "es";
+ if ("eus".equals(lang)) return "eu";
+ if ("baq".equals(lang)) return "eu";
+ if ("fas".equals(lang)) return "fa";
+ if ("per".equals(lang)) return "fa";
+ if ("fin".equals(lang)) return "fi";
+ if ("fra".equals(lang)) return "fr";
+ if ("fre".equals(lang)) return "fr";
+ if ("gle".equals(lang)) return "ga";
+ if ("glg".equals(lang)) return "gl";
+ if ("hin".equals(lang)) return "hi";
+ if ("hun".equals(lang)) return "hu";
+ if ("hye".equals(lang)) return "hy";
+ if ("arm".equals(lang)) return "hy";
+ if ("ind".equals(lang)) return "id";
+ if ("ita".equals(lang)) return "it";
+ if ("jpn".equals(lang)) return "jp";
+ if ("kor".equals(lang)) return "ko";
+ if ("lav".equals(lang)) return "lv";
+ if ("nld".equals(lang)) return "nl";
+ if ("dut".equals(lang)) return "nl";
+ if ("nor".equals(lang)) return "no";
+ if ("por".equals(lang)) return "pt";
+ if ("ron".equals(lang)) return "ro";
+ if ("rum".equals(lang)) return "ro";
+ if ("rus".equals(lang)) return "ru";
+ if ("swe".equals(lang)) return "sv";
+ if ("tha".equals(lang)) return "th";
+ if ("tur".equals(lang)) return "tr";
+ if ("zho".equals(lang)) return "zh";
+ if ("chi".equals(lang)) return "zh";
+ }
+ }
+
+ return null;
+ }
+
+ private static void initAnalyzerDefs() {
+ analyzersClasses = new Hashtable<>();
+ analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
+ analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
+ analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
+ analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
+ analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
+ analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
+ analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
+ analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
+ analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
+ analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
+ analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
+ analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
+ analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
+ analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
+ analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
+ analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
+ analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
+ analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
+ analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
+ analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
+ analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
+ analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
+ analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
+ analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
+ analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
+ analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
+ analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
+ analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
+ analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
+ analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index 2b4e7ff..dd48bfa 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -83,49 +83,132 @@ public class TextDatasetFactory
return dsgt ;
}
-
+
/**
* Create a Lucene TextIndex
- *
+ *
* @param directory The Lucene Directory for the index
* @param def The EntityDefinition that defines how entities are stored in the index
* @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
+ */
public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
{
TextIndex index = new TextIndexLucene(directory, def, queryAnalyzer) ;
+ return index ;
+ }
+
+ /**
+ * Create a Lucene TextIndex
+ *
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ */
+ public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
+ {
+ TextIndex index = new TextIndexLucene(directory, def, analyzer, queryAnalyzer) ;
return index ;
}
- /**
+ /**
+ * Create a multilingual Lucene TextIndex
+ *
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ */
+ public static TextIndex createLuceneIndexMultilingual(Directory directory, EntityDefinition def)
+ {
+ TextIndex index = new TextIndexLuceneMultilingual(directory, def) ;
+ return index ;
+ }
+
+ /**
* Create a text-indexed dataset, using Lucene
- *
+ *
* @param base the base Dataset
* @param directory The Lucene Directory for the index
* @param def The EntityDefinition that defines how entities are stored in the index
* @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
+ */
public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
{
TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
- return create(base, index, true) ;
+ return create(base, index, true) ;
}
/**
* Create a text-indexed dataset, using Lucene
*
- * @param base the base DatasetGraph
+ * @param base the base Dataset
* @param directory The Lucene Directory for the index
* @param def The EntityDefinition that defines how entities are stored in the index
+ * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
* @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
*/
+ public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
+ {
+ TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
+ return create(base, index, true) ;
+ }
+
+ /**
+ * Create a multilingual text-indexed dataset, using Lucene
+ *
+ * @param base the base Dataset
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ */
+ public static Dataset createLuceneMultilingual(Dataset base, Directory directory, EntityDefinition def)
+ {
+ TextIndex index = createLuceneIndexMultilingual(directory, def) ;
+ return create(base, index, true) ;
+ }
+
+ /**
+ * Create a text-indexed dataset, using Lucene
+ *
+ * @param base the base DatasetGraph
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ */
public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
{
TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
+ return create(base, index, true) ;
+ }
+
+ /**
+ * Create a text-indexed dataset, using Lucene
+ *
+ * @param base the base DatasetGraph
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ */
+ public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
+ {
+ TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
return create(base, index, true) ;
}
- /** Create a Solr TextIndex */
+ /**
+ * Create a multilingual text-indexed dataset, using Lucene
+ *
+ * @param base the base DatasetGraph
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ */
+ public static DatasetGraph createLuceneMultilingual(DatasetGraph base, Directory directory, EntityDefinition def)
+ {
+ TextIndex index = createLuceneIndexMultilingual(directory, def) ;
+ return create(base, index, true) ;
+ }
+
+
+ /** Create a Solr TextIndex */
public static TextIndex createSolrIndex(SolrServer server, EntityDefinition entMap)
{
TextIndex index = new TextIndexSolr(server, entMap) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
index 69efb31..ffe92e7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
@@ -51,4 +51,7 @@ public interface TextIndex extends Closeable //, Transactional
List<Node> query(String qs) ;
EntityDefinition getDocDef() ;
+
+ //localization
+ boolean isMultilingual() ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 9107e86..004c242 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -85,12 +85,24 @@ public class TextIndexLucene implements TextIndex {
/**
* Constructs a new TextIndexLucene.
- *
+ *
* @param directory The Lucene Directory for the index
* @param def The EntityDefinition that defines how entities are stored in the index
* @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
*/
public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer queryAnalyzer) {
+ this(directory, def, null, queryAnalyzer);
+ }
+
+ /**
+ * Constructs a new TextIndexLucene.
+ *
+ * @param directory The Lucene Directory for the index
+ * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
+ * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ */
+ public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer) {
this.directory = directory ;
this.docDef = def ;
@@ -102,14 +114,15 @@ public class TextIndexLucene implements TextIndex {
analyzerPerField.put(def.getGraphField(), new KeywordAnalyzer()) ;
for (String field : def.fields()) {
- Analyzer analyzer = def.getAnalyzer(field);
- if (analyzer != null) {
- analyzerPerField.put(field, analyzer);
+ Analyzer _analyzer = def.getAnalyzer(field);
+ if (_analyzer != null) {
+ analyzerPerField.put(field, _analyzer);
}
}
- this.analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(VER), analyzerPerField) ;
- this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : analyzer ;
+ this.analyzer = new PerFieldAnalyzerWrapper(
+ (null != analyzer) ? analyzer : new StandardAnalyzer(VER), analyzerPerField) ;
+ this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : this.analyzer ;
openIndexWriter();
}
@@ -194,8 +207,15 @@ public class TextIndexLucene implements TextIndex {
log.debug("Update entity: " + entity) ;
try {
Document doc = doc(entity);
+ Analyzer analyzer = null;
+ if (isMultilingual())
+ analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
Term term = new Term(docDef.getEntityField(), entity.getId());
- indexWriter.updateDocument(term, doc);
+
+ if (analyzer != null)
+ indexWriter.updateDocument(term, doc, analyzer) ;
+ else //use the default one
+ indexWriter.updateDocument(term, doc);
} catch (IOException e) {
throw new TextIndexException(e) ;
}
@@ -207,7 +227,14 @@ public class TextIndexLucene implements TextIndex {
log.debug("Add entity: " + entity) ;
try {
Document doc = doc(entity) ;
- indexWriter.addDocument(doc) ;
+ Analyzer analyzer = null;
+ if (isMultilingual())
+ analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+
+ if (analyzer != null)
+ indexWriter.addDocument(doc, analyzer) ;
+ else //use the default one
+ indexWriter.addDocument(doc) ;
}
catch (IOException e) {
throw new TextIndexException(e) ;
@@ -228,6 +255,13 @@ public class TextIndexLucene implements TextIndex {
for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
Field field = new Field(e.getKey(), (String)e.getValue(), ftText) ;
doc.add(field) ;
+ if (isMultilingual()) {
+ String lang = entity.getLanguage();
+ if (lang == null || "".equals(lang))
+ lang = "undef";
+ field = new Field("lang", lang, StringField.TYPE_STORED ) ;
+ doc.add(field) ;
+ }
}
return doc ;
}
@@ -305,7 +339,14 @@ public class TextIndexLucene implements TextIndex {
private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
- Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+ Analyzer qAnalyzer = queryAnalyzer;
+ if (isMultilingual()) {//index and query analyzer must be the same
+ String lang = qs.substring( qs.lastIndexOf(":") + 1);
+ if (!"undef".equals(lang))
+ qAnalyzer = LuceneUtil.getLocalizedAnalyzer(lang);
+ }
+
+ Query query = parseQuery(qs, docDef.getPrimaryField(), qAnalyzer) ;
if ( limit <= 0 )
limit = MAX_N ;
ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
@@ -329,6 +370,11 @@ public class TextIndexLucene implements TextIndex {
return docDef ;
}
+ @Override
+ public boolean isMultilingual() {
+ return false;
+ }
+
private Node entryToNode(String v) {
// TEMP
return NodeFactoryExtra.createLiteralNode(v, null, null) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
new file mode 100644
index 0000000..86b34e6
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.lucene.store.Directory;
+
+public class TextIndexLuceneMultilingual extends TextIndexLucene {
+
+ public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
+ super(directory, def, null) ;
+ }
+
+ @Override
+ public boolean isMultilingual() {
+ return true;
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
index 54a3263..5be898c 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
@@ -231,6 +231,11 @@ public class TextIndexSolr implements TextIndex
return docDef ;
}
+ @Override
+ public boolean isMultilingual() {
+ return false;
+ }
+
private Node entryToNode(String v)
{
// TEMP
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
index 512297e..d628c4a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
@@ -46,6 +46,12 @@ public class TextQueryFuncs {
return nodeToString(g) ;
}
+ /** retrieve language (if exists) if object is literal */
+ public static String getLiteralLanguage(Node o) {
+ String lang = o.getLiteral().language();
+ return lang;
+ }
+
private static String nodeToString(Node n) {
return (n.isURI() ) ? n.getURI() : "_:" + n.getBlankNodeLabel() ;
}
@@ -77,7 +83,8 @@ public class TextQueryFuncs {
String x = TextQueryFuncs.subjectToString(s) ;
String graphText = TextQueryFuncs.graphNodeToString(g) ;
- Entity entity = new Entity(x, graphText) ;
+ String language = TextQueryFuncs.getLiteralLanguage(o) ;
+ Entity entity = new Entity(x, graphText, language) ;
String graphField = defn.getGraphField() ;
if ( defn.getGraphField() != null )
entity.put(graphField, graphText) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index 140ef09..cc7e4f6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -18,6 +18,7 @@
package org.apache.jena.query.text ;
+import java.util.Iterator;
import java.util.List ;
import org.apache.jena.atlas.iterator.Iter ;
@@ -57,6 +58,8 @@ public class TextQueryPF extends PropertyFunctionBase {
public TextQueryPF() {}
+ private String langArg = null;
+
@Override
public void build(PropFuncArg argSubject, Node predicate, PropFuncArg argObject, ExecutionContext execCxt) {
super.build(argSubject, predicate, argObject, execCxt) ;
@@ -69,6 +72,14 @@ public class TextQueryPF extends PropertyFunctionBase {
throw new QueryBuildException("Subject is not a single node: " + argSubject) ;
if (argObject.isList()) {
+ //extract of extra lang arg if present and if is usable (multilingual index).
+ //arg is removed from the list to avoid conflict with order and args length
+ if (server.isMultilingual()) {
+ langArg = extractArg("lang", argObject);
+ if (langArg == null)
+ langArg = "undef";
+ }
+
List<Node> list = argObject.getArgList() ;
if (list.size() == 0)
throw new QueryBuildException("Zero-length argument list") ;
@@ -100,6 +111,26 @@ public class TextQueryPF extends PropertyFunctionBase {
return null ;
}
+ private String extractArg(String prefix, PropFuncArg argObject) {
+ String value = null;
+ int pos = 0;
+ for (Iterator it = argObject.getArgList().iterator(); it.hasNext(); ) {
+ Node node = (Node)it.next();
+ if (node.isLiteral()) {
+ String arg = node.getLiteral().toString();
+ if (arg.startsWith(prefix + ":")) {
+ value = arg.split(":")[1];
+ break;
+ }
+ }
+ pos++;
+ }
+ if (value != null)
+ argObject.getArgList().remove(pos);
+
+ return value;
+ }
+
@Override
public QueryIterator exec(Binding binding, PropFuncArg argSubject, Node predicate, PropFuncArg argObject,
ExecutionContext execCxt) {
@@ -179,8 +210,14 @@ public class TextQueryPF extends PropertyFunctionBase {
String qs2 = server.getDocDef().getGraphField() + ":" + escaped ;
queryString = "(" + queryString + ") AND " + qs2 ;
}
- }
-
+ }
+
+ //for multilingual index
+ if (langArg != null) {
+ String qs2 = "lang:" + langArg;
+ queryString = "(" + queryString + ") AND " + qs2 ;
+ }
+
Explain.explain(execCxt.getContext(), "Text query: "+queryString) ;
if ( log.isDebugEnabled())
log.debug("Text query: {} ({})", queryString,limit) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
new file mode 100644
index 0000000..1e37c15
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.query.text.LuceneUtil;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+/**
+ * Assembler to create localized analyzer.
+ */
+public class LocalizedAnalyzerAssembler extends AssemblerBase {
+ /*
+ text:map (
+ [ text:field "text" ;
+ text:predicate rdfs:label;
+ text:analyzer [
+ a lucene:LocalizedAnalyzer ;
+ text:language "en" ;
+ ]
+ .
+ */
+
+ @Override
+ public Analyzer open(Assembler a, Resource root, Mode mode) {
+ if (root.hasProperty(TextVocab.pLanguage)) {
+ RDFNode node = root.getProperty(TextVocab.pLanguage).getObject();
+ if (! node.isLiteral()) {
+ throw new TextIndexException("text:language property must be a string : " + node);
+ }
+ String lang = node.toString();
+ return LuceneUtil.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+ } else {
+ return new StandardAnalyzer(TextIndexLucene.VER);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index d901bc7..790dac7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -30,10 +30,12 @@ public class TextAssembler
Assembler.general.implementWith(TextVocab.entityMap, new EntityDefinitionAssembler()) ;
Assembler.general.implementWith(TextVocab.textIndexSolr, new TextIndexSolrAssembler()) ;
Assembler.general.implementWith(TextVocab.textIndexLucene, new TextIndexLuceneAssembler()) ;
+ Assembler.general.implementWith(TextVocab.textIndexLuceneMultilingual, new TextIndexLuceneMultilingualAssembler()) ;
Assembler.general.implementWith(TextVocab.standardAnalyzer, new StandardAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.simpleAnalyzer, new SimpleAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.keywordAnalyzer, new KeywordAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ;
+ Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
new file mode 100644
index 0000000..a36fcbe
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.atlas.io.IO;
+import org.apache.jena.atlas.lib.IRILib;
+import org.apache.jena.query.text.EntityDefinition;
+import org.apache.jena.query.text.TextDatasetFactory;
+import org.apache.jena.query.text.TextIndex;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.sparql.util.graph.GraphUtils;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.File;
+import java.io.IOException;
+
+import static org.apache.jena.query.text.assembler.TextVocab.pDirectory;
+import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap;
+
+public class TextIndexLuceneMultilingualAssembler extends AssemblerBase {
+ /*
+ <#index> a :TextIndexLuceneMultilingual ;
+ #text:directory "mem" ;
+ #text:directory "DIR" ;
+ text:directory <file:DIR> ;
+ text:entityMap <#endMap> ;
+ .
+ */
+
+ @SuppressWarnings("resource")
+ @Override
+ public TextIndex open(Assembler a, Resource root, Mode mode) {
+ try {
+ if ( !GraphUtils.exactlyOneProperty(root, pDirectory) )
+ throw new TextIndexException("No 'text:directory' property on " + root) ;
+
+ Directory directory ;
+
+ RDFNode n = root.getProperty(pDirectory).getObject() ;
+ if ( n.isLiteral() ) {
+ String literalValue = n.asLiteral().getLexicalForm() ;
+ if (literalValue.equals("mem")) {
+ directory = new RAMDirectory() ;
+ } else {
+ File dir = new File(literalValue) ;
+ directory = FSDirectory.open(dir) ;
+ }
+ } else {
+ Resource x = n.asResource() ;
+ String path = IRILib.IRIToFilename(x.getURI()) ;
+ File dir = new File(path) ;
+ directory = FSDirectory.open(dir) ;
+ }
+
+ Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
+ EntityDefinition docDef = (EntityDefinition)a.open(r) ;
+
+ return TextDatasetFactory.createLuceneIndexMultilingual(directory, docDef) ;
+ } catch (IOException e) {
+ IO.exception(e) ;
+ return null ;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index a835a6f..79c223e 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -36,6 +36,8 @@ public class TextVocab
public static final Resource textIndex = Vocab.resource(NS, "TextIndex") ;
public static final Resource textIndexSolr = Vocab.resource(NS, "TextIndexSolr") ;
public static final Resource textIndexLucene = Vocab.resource(NS, "TextIndexLucene") ;
+ public static final Resource textIndexLuceneMultilingual = Vocab.resource(NS, "TextIndexLuceneMultilingual") ;
+ public static final Property pLanguage = Vocab.property(NS, "language") ;
public static final Property pServer = Vocab.property(NS, "server") ; // Solr
public static final Property pDirectory = Vocab.property(NS, "directory") ; // Lucene
public static final Property pQueryAnalyzer = Vocab.property(NS, "queryAnalyzer") ;
@@ -57,6 +59,7 @@ public class TextVocab
public static final Resource simpleAnalyzer = Vocab.resource(NS, "SimpleAnalyzer");
public static final Resource keywordAnalyzer = Vocab.resource(NS, "KeywordAnalyzer");
public static final Resource lowerCaseKeywordAnalyzer = Vocab.resource(NS, "LowerCaseKeywordAnalyzer");
+ public static final Resource localizedAnalyzer = Vocab.resource(NS, "LocalizedAnalyzer");
}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 115b493..0219675 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -30,6 +30,7 @@ import org.junit.runners.Suite.SuiteClasses ;
@SuiteClasses({
TestBuildTextDataset.class
, TestDatasetWithLuceneTextIndex.class
+ , TestDatasetWithLuceneMultilingualTextIndex.class
, TestDatasetWithLuceneGraphTextIndex.class
// Embedded solr not supported
@@ -45,6 +46,7 @@ import org.junit.runners.Suite.SuiteClasses ;
, TestDatasetWithKeywordAnalyzer.class
, TestDatasetWithLowerCaseKeywordAnalyzer.class
, TestLuceneWithMultipleThreads.class
+ , TestDatasetWithLocalizedAnalyzer.class
})
public class TS_Text
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java
new file mode 100644
index 0000000..5becf5b
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.Dataset;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Resource;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * This class defines a setup configuration for a dataset that uses a simple analyzer with a Lucene index.
+ */
+public class TestDatasetWithLocalizedAnalyzer extends AbstractTestDatasetWithTextIndexBase {
+ private static final String INDEX_PATH = "target/test/TestDatasetWithLuceneIndex";
+ private static final File indexDir = new File(INDEX_PATH);
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory <file:" + INDEX_PATH + "> ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; ",
+ " text:predicate rdfs:label ;",
+ " text:analyzer [ ",
+ " a text:LocalizedAnalyzer ;",
+ " text:language \"en\" ",
+ " ]",
+ " ]",
+ " [ text:field \"label\" ; text:predicate rdfs:label ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ public void init() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+// deleteOldFiles();
+ indexDir.mkdirs();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+
+ public void deleteOldFiles() {
+ dataset.close();
+ if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+ }
+
+ @Before
+ public void beforeClass() {
+ init();
+ }
+
+ @After
+ public void afterClass() {
+ deleteOldFiles();
+ }
+
+ @Test
+ public void testLocalizedAnalyzer() {
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testLocalizedAnalyzer>",
+ " rdfs:label 'This is my book'",
+ "."
+ );
+ // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'books' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testLocalizedAnalyzer")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
new file mode 100644
index 0000000..58a78f1
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.*;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.riot.RDFDataMgr;
+import org.apache.jena.riot.Lang;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestDatasetWithTextIndex {
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+
+ static final String DIR = "testing/TextQuery" ;
+
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix skos: <http://www.w3.org/2004/02/skos/core#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLuceneMultilingual rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLuceneMultilingual ;",
+ " text:directory \"mem\" ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; text:predicate rdfs:label ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " [ text:field \"prefLabel\" ; text:predicate skos:prefLabel ]",
+ " ) ."
+ );
+ }
+
+ @Before
+ public void before() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+ @After
+ public void after() {
+ dataset.close();
+ }
+
+ @Test
+ public void testNoResultsOnFirstCreateIndex(){
+ String turtle = "";
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label \"book\" \"lang:en\" 10 ) .",
+ "}"
+ );
+ doTestSearch(turtle, queryString, new HashSet<String>());
+ }
+
+ @Test
+ public void testRetrievingEnglishLocalizedResource(){
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testEnglishLocalizedResource>",
+ " rdfs:label 'He offered me a gift'@en",
+ ".",
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testGermanLocalizedResource>",
+ " rdfs:label 'Er schluckte gift'@de",
+ "."
+ );
+ // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'gift' 'lang:en' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testEnglishLocalizedResource")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+
+ @Test
+ public void testRetrievingGermanLocalizedResource(){
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testEnglishLocalizedResource>",
+ " rdfs:label 'He offered me a gift'@en",
+ ".",
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testGermanLocalizedResource>",
+ " rdfs:label 'Er schluckte gift'@de",
+ "."
+ );
+ // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'gift' 'lang:de' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testGermanLocalizedResource")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+
+ @Test
+ public void testEnglishStemming(){
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testEnglishStemming>",
+ " rdfs:label 'I met some engineers'@en",
+ "."
+ );
+ // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'engineering' 'lang:en' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testEnglishStemming")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+
+ @Test
+ public void testRetrievingSKOSConcepts() {
+ String queryString = StrUtils.strjoinNL(
+ "PREFIX text: <http://jena.apache.org/text#>",
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>",
+ "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>",
+ "SELECT ?s",
+ "WHERE {",
+ " { ?s text:query ( skos:prefLabel 'frites' 'lang:fr' ) }",
+ " UNION ",
+ " { ?s text:query ( skos:prefLabel 'Kartoffelpüree' 'lang:de' ) }" ,
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll(Arrays.asList("http://example.com/dishes#fries",
+ "http://example.com/dishes#mashed")) ;
+
+ dataset.begin(ReadWrite.WRITE);
+ Model model = dataset.getDefaultModel();
+ RDFDataMgr.read(model, DIR + "/data.skos", Lang.RDFXML);
+ dataset.commit();
+ doTestQuery(dataset, "", queryString, expectedURIs, expectedURIs.size());
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/testing/TextQuery/data.skos
----------------------------------------------------------------------
diff --git a/jena-text/testing/TextQuery/data.skos b/jena-text/testing/TextQuery/data.skos
new file mode 100644
index 0000000..056b91d
--- /dev/null
+++ b/jena-text/testing/TextQuery/data.skos
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<rdf:RDF
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+ xmlns:skos="http://www.w3.org/2004/02/skos/core#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
+
+ <skos:ConceptScheme rdf:about="http://example.com/dishes">
+ <dc:title>The Example Taxonomy</dc:title>
+ <dc:description>An example taxonomy to illustrate the use of the SKOS schema.</dc:description>
+ </skos:ConceptScheme>
+
+ <skos:Concept rdf:about="http://example.com/dishes#potatoBased">
+ <skos:prefLabel xml:lang="fr">Plats à base de pomme de terre</skos:prefLabel>
+ <skos:prefLabel xml:lang="en">Potato based dishes</skos:prefLabel>
+ <skos:prefLabel xml:lang="de">Kartoffelgerichte</skos:prefLabel>
+ <skos:inScheme rdf:resource="http://example.com/dishes"/>
+ <skos:topConceptOf rdf:resource="http://example.com/dishes"/>
+ </skos:Concept>
+
+ <skos:Concept rdf:about="http://example.com/dishes#fries">
+ <skos:prefLabel xml:lang="fr">Frites</skos:prefLabel>
+ <skos:prefLabel xml:lang="en">French fries</skos:prefLabel>
+ <skos:prefLabel xml:lang="de">Französisch frites</skos:prefLabel>
+ <skos:inScheme rdf:resource="http://example.com/dishes"/>
+ <skos:broader rdf:resource="http://example.com/dishes#potatoBased"/>
+ </skos:Concept>
+
+ <skos:Concept rdf:about="http://example.com/dishes#mashed">
+ <skos:prefLabel xml:lang="fr">Purée de pomme de terre</skos:prefLabel>
+ <skos:prefLabel xml:lang="en">Mashed potatoes</skos:prefLabel>
+ <skos:prefLabel xml:lang="de">Kartoffelpüree</skos:prefLabel>
+ <skos:inScheme rdf:resource="http://example.com/dishes"/>
+ <skos:broader rdf:resource="http://example.com/dishes#potatoBased"/>
+ </skos:Concept>
+</rdf:RDF>
\ No newline at end of file
[2/6] jena git commit: Remove of multilingual aspects in
TextIndexLucene + only usage of ISO 639-1 language codes
Posted by an...@apache.org.
Remove of multilingual aspects in TextIndexLucene
+ only usage of ISO 639-1 language codes
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/7ab59ed6
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/7ab59ed6
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/7ab59ed6
Branch: refs/heads/master
Commit: 7ab59ed6d914496c9a1492376745fe9cee840f67
Parents: 9553c6b
Author: Alexis Miara <al...@hotmail.com>
Authored: Thu May 14 09:17:40 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Thu May 14 09:17:40 2015 -0400
----------------------------------------------------------------------
.../org/apache/jena/query/text/LuceneUtil.java | 55 ---------------
.../org/apache/jena/query/text/TextIndex.java | 3 -
.../apache/jena/query/text/TextIndexLucene.java | 74 ++++++++------------
.../query/text/TextIndexLuceneMultilingual.java | 47 ++++++++++++-
.../apache/jena/query/text/TextIndexSolr.java | 5 --
.../org/apache/jena/query/text/TextQueryPF.java | 8 +--
6 files changed, 78 insertions(+), 114 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
index 7fafc4c..050b6f3 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
@@ -37,7 +37,6 @@ public class LuceneUtil {
}
public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
- lang = getISO2Language(lang);
if (lang == null)
return null;
@@ -58,60 +57,6 @@ public class LuceneUtil {
}
}
- public static String getISO2Language(String lang) {
- if (lang != null) {
- lang = lang.split("-")[0].toLowerCase();
- if (lang.length() == 2)
- return lang;
- else {
- if ("ara".equals(lang)) return "ar";
- if ("bul".equals(lang)) return "bg";
- if ("cat".equals(lang)) return "ca";
- if ("ces".equals(lang)) return "cs";
- if ("cze".equals(lang)) return "cs";
- if ("dan".equals(lang)) return "da";
- if ("deu".equals(lang)) return "de";
- if ("ger".equals(lang)) return "de";
- if ("ell".equals(lang)) return "el";
- if ("gre".equals(lang)) return "el";
- if ("eng".equals(lang)) return "en";
- if ("spa".equals(lang)) return "es";
- if ("eus".equals(lang)) return "eu";
- if ("baq".equals(lang)) return "eu";
- if ("fas".equals(lang)) return "fa";
- if ("per".equals(lang)) return "fa";
- if ("fin".equals(lang)) return "fi";
- if ("fra".equals(lang)) return "fr";
- if ("fre".equals(lang)) return "fr";
- if ("gle".equals(lang)) return "ga";
- if ("glg".equals(lang)) return "gl";
- if ("hin".equals(lang)) return "hi";
- if ("hun".equals(lang)) return "hu";
- if ("hye".equals(lang)) return "hy";
- if ("arm".equals(lang)) return "hy";
- if ("ind".equals(lang)) return "id";
- if ("ita".equals(lang)) return "it";
- if ("jpn".equals(lang)) return "jp";
- if ("kor".equals(lang)) return "ko";
- if ("lav".equals(lang)) return "lv";
- if ("nld".equals(lang)) return "nl";
- if ("dut".equals(lang)) return "nl";
- if ("nor".equals(lang)) return "no";
- if ("por".equals(lang)) return "pt";
- if ("ron".equals(lang)) return "ro";
- if ("rum".equals(lang)) return "ro";
- if ("rus".equals(lang)) return "ru";
- if ("swe".equals(lang)) return "sv";
- if ("tha".equals(lang)) return "th";
- if ("tur".equals(lang)) return "tr";
- if ("zho".equals(lang)) return "zh";
- if ("chi".equals(lang)) return "zh";
- }
- }
-
- return null;
- }
-
private static void initAnalyzerDefs() {
analyzersClasses = new Hashtable<>();
analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
index ffe92e7..69efb31 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
@@ -51,7 +51,4 @@ public interface TextIndex extends Closeable //, Transactional
List<Node> query(String qs) ;
EntityDefinition getDocDef() ;
-
- //localization
- boolean isMultilingual() ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 004c242..abb9466 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -206,42 +206,36 @@ public class TextIndexLucene implements TextIndex {
if ( log.isDebugEnabled() )
log.debug("Update entity: " + entity) ;
try {
- Document doc = doc(entity);
- Analyzer analyzer = null;
- if (isMultilingual())
- analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
- Term term = new Term(docDef.getEntityField(), entity.getId());
-
- if (analyzer != null)
- indexWriter.updateDocument(term, doc, analyzer) ;
- else //use the default one
- indexWriter.updateDocument(term, doc);
+ updateDocument(entity);
} catch (IOException e) {
throw new TextIndexException(e) ;
}
}
+ protected void updateDocument(Entity entity) throws IOException {
+ Document doc = doc(entity);
+ Term term = new Term(docDef.getEntityField(), entity.getId());
+ indexWriter.updateDocument(term, doc);
+ }
+
@Override
public void addEntity(Entity entity) {
if ( log.isDebugEnabled() )
log.debug("Add entity: " + entity) ;
try {
- Document doc = doc(entity) ;
- Analyzer analyzer = null;
- if (isMultilingual())
- analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
-
- if (analyzer != null)
- indexWriter.addDocument(doc, analyzer) ;
- else //use the default one
- indexWriter.addDocument(doc) ;
+ addDocument(entity);
}
catch (IOException e) {
throw new TextIndexException(e) ;
}
}
- private Document doc(Entity entity) {
+ protected void addDocument(Entity entity) throws IOException {
+ Document doc = doc(entity) ;
+ indexWriter.addDocument(doc) ;
+ }
+
+ protected Document doc(Entity entity) {
Document doc = new Document() ;
Field entField = new Field(docDef.getEntityField(), entity.getId(), ftIRI) ;
doc.add(entField) ;
@@ -252,18 +246,18 @@ public class TextIndexLucene implements TextIndex {
doc.add(gField) ;
}
+ for ( Field field : buildContentFields(entity) )
+ doc.add(field);
+
+ return doc ;
+ }
+
+ protected List<Field> buildContentFields(Entity entity) {
+ List<Field> list = new ArrayList<>();
for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
- Field field = new Field(e.getKey(), (String)e.getValue(), ftText) ;
- doc.add(field) ;
- if (isMultilingual()) {
- String lang = entity.getLanguage();
- if (lang == null || "".equals(lang))
- lang = "undef";
- field = new Field("lang", lang, StringField.TYPE_STORED ) ;
- doc.add(field) ;
- }
+ list.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
}
- return doc ;
+ return list;
}
@Override
@@ -289,10 +283,14 @@ public class TextIndexLucene implements TextIndex {
return query ;
}
+ protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+ return parseQuery(queryString, primaryField, analyzer);
+ }
+
private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
String escaped = QueryParserBase.escape(uri) ;
String qs = docDef.getEntityField() + ":" + escaped ;
- Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+ Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs ;
List<Map<String, Node>> records = new ArrayList<Map<String, Node>>() ;
@@ -339,14 +337,7 @@ public class TextIndexLucene implements TextIndex {
private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
- Analyzer qAnalyzer = queryAnalyzer;
- if (isMultilingual()) {//index and query analyzer must be the same
- String lang = qs.substring( qs.lastIndexOf(":") + 1);
- if (!"undef".equals(lang))
- qAnalyzer = LuceneUtil.getLocalizedAnalyzer(lang);
- }
-
- Query query = parseQuery(qs, docDef.getPrimaryField(), qAnalyzer) ;
+ Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
if ( limit <= 0 )
limit = MAX_N ;
ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
@@ -370,11 +361,6 @@ public class TextIndexLucene implements TextIndex {
return docDef ;
}
- @Override
- public boolean isMultilingual() {
- return false;
- }
-
private Node entryToNode(String v) {
// TEMP
return NodeFactoryExtra.createLiteralNode(v, null, null) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index 86b34e6..cdf7876 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -18,8 +18,18 @@
package org.apache.jena.query.text;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
+import java.io.IOException;
+import java.util.List;
+
public class TextIndexLuceneMultilingual extends TextIndexLucene {
public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
@@ -27,7 +37,40 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
}
@Override
- public boolean isMultilingual() {
- return true;
+ protected void updateDocument(Entity entity) throws IOException {
+ Document doc = doc(entity);
+ Term term = new Term(getDocDef().getEntityField(), entity.getId());
+ Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ if (analyzer == null)
+ analyzer = getAnalyzer();
+ getIndexWriter().updateDocument(term, doc, analyzer) ;
+ }
+
+ @Override
+ protected void addDocument(Entity entity) throws IOException {
+ Document doc = doc(entity) ;
+ Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ if (analyzer == null)
+ analyzer = getAnalyzer();
+ getIndexWriter().addDocument(doc, analyzer) ;
+ }
+
+ @Override
+ protected List<Field> buildContentFields(Entity entity) {
+ List<Field> list = super.buildContentFields(entity);
+ String lang = entity.getLanguage();
+ if (lang == null || "".equals(lang))
+ lang = "undef";
+ list.add( new Field("lang", lang, StringField.TYPE_STORED ) );
+ return list;
+ }
+
+ @Override
+ protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+ String lang = queryString.substring( queryString.lastIndexOf(":") + 1);
+ if (!"undef".equals(lang))
+ analyzer = LuceneUtil.getLocalizedAnalyzer(lang);
+
+ return super.preParseQuery(queryString, primaryField, analyzer);
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
index 5be898c..54a3263 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
@@ -231,11 +231,6 @@ public class TextIndexSolr implements TextIndex
return docDef ;
}
- @Override
- public boolean isMultilingual() {
- return false;
- }
-
private Node entryToNode(String v)
{
// TEMP
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index cc7e4f6..4fac00b 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -74,11 +74,9 @@ public class TextQueryPF extends PropertyFunctionBase {
if (argObject.isList()) {
//extract of extra lang arg if present and if is usable (multilingual index).
//arg is removed from the list to avoid conflict with order and args length
- if (server.isMultilingual()) {
- langArg = extractArg("lang", argObject);
- if (langArg == null)
- langArg = "undef";
- }
+ langArg = extractArg("lang", argObject);
+ if (langArg == null && server instanceof TextIndexLuceneMultilingual)
+ langArg = "undef";
List<Node> list = argObject.getArgList() ;
if (list.size() == 0)
[3/6] jena git commit: langField implementation to store lang tags of
literals + refactoring growing methods of TextDatasetFactory
Posted by an...@apache.org.
langField implementation to store lang tags of literals
+ refactoring growing methods of TextDatasetFactory
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1a57c9d3
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1a57c9d3
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1a57c9d3
Branch: refs/heads/master
Commit: 1a57c9d35b9ecf17c7e65c6bf7f19951adc3e44f
Parents: 7ab59ed
Author: Alexis Miara <al...@hotmail.com>
Authored: Tue May 19 14:41:32 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Tue May 19 14:41:32 2015 -0400
----------------------------------------------------------------------
.../main/java/examples/JenaTextExample1.java | 6 +-
.../jena/query/text/EntityDefinition.java | 70 +++--------
.../org/apache/jena/query/text/LuceneUtil.java | 95 --------------
.../jena/query/text/TextDatasetFactory.java | 109 ++--------------
.../apache/jena/query/text/TextIndexConfig.java | 61 +++++++++
.../apache/jena/query/text/TextIndexLucene.java | 50 +++-----
.../query/text/TextIndexLuceneMultilingual.java | 41 +++---
.../org/apache/jena/query/text/TextQueryPF.java | 16 +--
.../apache/jena/query/text/analyzer/Util.java | 96 ++++++++++++++
.../assembler/EntityDefinitionAssembler.java | 8 +-
.../assembler/LocalizedAnalyzerAssembler.java | 4 +-
.../query/text/assembler/TextAssembler.java | 1 -
.../assembler/TextIndexLuceneAssembler.java | 40 ++++--
.../TextIndexLuceneMultilingualAssembler.java | 87 -------------
.../jena/query/text/assembler/TextVocab.java | 4 +-
...ractTestDatasetWithLuceneGraphTextIndex.java | 6 +-
.../org/apache/jena/query/text/TS_Text.java | 1 +
.../jena/query/text/TestBuildTextDataset.java | 5 +-
...tDatasetWithLuceneMultilingualTextIndex.java | 34 ++++-
...DatasetWithLuceneTextIndexWithLangField.java | 126 +++++++++++++++++++
.../text/TestLuceneWithMultipleThreads.java | 10 +-
.../org/apache/jena/query/text/TestTextTDB.java | 5 +-
22 files changed, 453 insertions(+), 422 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/examples/JenaTextExample1.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/examples/JenaTextExample1.java b/jena-text/src/main/java/examples/JenaTextExample1.java
index 6ad2c26..c273540 100644
--- a/jena-text/src/main/java/examples/JenaTextExample1.java
+++ b/jena-text/src/main/java/examples/JenaTextExample1.java
@@ -23,6 +23,7 @@ import org.apache.jena.atlas.logging.LogCtl ;
import org.apache.jena.query.* ;
import org.apache.jena.query.text.EntityDefinition ;
import org.apache.jena.query.text.TextDatasetFactory ;
+import org.apache.jena.query.text.TextIndexConfig;
import org.apache.jena.query.text.TextQuery ;
import org.apache.jena.rdf.model.Model ;
import org.apache.jena.riot.RDFDataMgr ;
@@ -58,13 +59,14 @@ public class JenaTextExample1
Dataset ds1 = DatasetFactory.createMem() ;
// Define the index mapping
- EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+ EntityDefinition entDef = new EntityDefinition("uri", "text");
+ entDef.setPrimaryPredicate(RDFS.label.asNode());
// Lucene, in memory.
Directory dir = new RAMDirectory();
// Join together into a dataset
- Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
+ Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef)) ;
return ds ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
index 2f15ffb..30b048a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
@@ -41,7 +41,8 @@ public class EntityDefinition {
// Collections.unmodifiableCollection(fieldToPredicate.keySet()) ;
private final String entityField ;
private final String primaryField ;
- private final String graphField ;
+ private String graphField ;
+ private String langField ;
//private final Node primaryPredicate ;
/**
@@ -51,67 +52,22 @@ public class EntityDefinition {
* The primary/default field to search
*/
public EntityDefinition(String entityField, String primaryField) {
- this(entityField, primaryField, (String)null) ;
- }
-
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param graphField
- * The field that stores graph URI, or null
- */
- public EntityDefinition(String entityField, String primaryField, String graphField) {
this.entityField = entityField ;
this.primaryField = primaryField ;
- this.graphField = graphField ;
}
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param primaryPredicate
- * The property associated with the primary/default field
- */
- public EntityDefinition(String entityField, String primaryField, Resource primaryPredicate) {
- this(entityField, primaryField, null, primaryPredicate.asNode()) ;
+ public String getEntityField() {
+ return entityField ;
}
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param primaryPredicate
- * The property associated with the primary/default field
- */
- public EntityDefinition(String entityField, String primaryField, Node primaryPredicate) {
- this(entityField, primaryField, null, primaryPredicate) ;
+ public void setPrimaryPredicate(Resource primaryPredicate) {
+ setPrimaryPredicate(primaryPredicate.asNode());
}
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param graphField
- * The field that stores graph URI, or null
- * @param primaryPredicate
- * The property associated with the primary/default field
- */
- public EntityDefinition(String entityField, String primaryField, String graphField, Node primaryPredicate) {
- this(entityField, primaryField, graphField) ;
+ public void setPrimaryPredicate(Node primaryPredicate) {
set(primaryField, primaryPredicate) ;
}
-
- public String getEntityField() {
- return entityField ;
- }
-
public void set(String field, Node predicate) {
predicateToField.put(predicate, field) ;
// Add uniquely.
@@ -149,6 +105,18 @@ public class EntityDefinition {
return graphField ;
}
+ public void setGraphField(String graphField) {
+ this.graphField = graphField;
+ }
+
+ public String getLangField() {
+ return langField;
+ }
+
+ public void setLangField(String langField) {
+ this.langField = langField;
+ }
+
public Collection<String> fields() {
return fields ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
deleted file mode 100644
index 050b6f3..0000000
--- a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.query.text;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.util.Version;
-import java.lang.reflect.Constructor;
-import java.util.Hashtable;
-
-public class LuceneUtil {
-
- private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
- private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
-
- static {
- initAnalyzerDefs();
- }
-
- public static Analyzer getLocalizedAnalyzer(String lang) {
- return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
- }
-
- public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
- if (lang == null)
- return null;
-
- if (cache.containsKey(lang))
- return cache.get(lang);
-
- try {
- Class<?> className = analyzersClasses.get(lang);
- if (className == null)
- return null;
- Constructor constructor = className.getConstructor(Version.class);
- Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
- cache.put(lang, analyzer);
- return analyzer;
- } catch (Exception e) {
- e.printStackTrace();
- return null;
- }
- }
-
- private static void initAnalyzerDefs() {
- analyzersClasses = new Hashtable<>();
- analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
- analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
- analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
- analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
- analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
- analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
- analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
- analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
- analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
- analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
- analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
- analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
- analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
- analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
- analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
- analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
- analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
- analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
- analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
- analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
- analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
- analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
- analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
- analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
- analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
- analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
- analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
- analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
- analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
- analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
- analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
- analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index dd48bfa..dc6a094 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -24,7 +24,6 @@ import org.apache.jena.query.text.assembler.TextVocab ;
import org.apache.jena.sparql.core.DatasetGraph ;
import org.apache.jena.sparql.core.assembler.AssemblerUtils ;
import org.apache.jena.sparql.util.Context ;
-import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.store.Directory ;
import org.apache.solr.client.solrj.SolrServer ;
@@ -88,80 +87,28 @@ public class TextDatasetFactory
* Create a Lucene TextIndex
*
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ * @param config The config definition for the index instantiation.
*/
- public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
+ public static TextIndex createLuceneIndex(Directory directory, TextIndexConfig config)
{
- TextIndex index = new TextIndexLucene(directory, def, queryAnalyzer) ;
+ TextIndex index;
+ if (config.isMultilingualSupport())
+ index = new TextIndexLuceneMultilingual(directory, config) ;
+ else
+ index = new TextIndexLucene(directory, config) ;
return index ;
}
/**
- * Create a Lucene TextIndex
- *
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
- {
- TextIndex index = new TextIndexLucene(directory, def, analyzer, queryAnalyzer) ;
- return index ;
- }
-
- /**
- * Create a multilingual Lucene TextIndex
- *
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- */
- public static TextIndex createLuceneIndexMultilingual(Directory directory, EntityDefinition def)
- {
- TextIndex index = new TextIndexLuceneMultilingual(directory, def) ;
- return index ;
- }
-
- /**
- * Create a text-indexed dataset, using Lucene
- *
- * @param base the base Dataset
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
- {
- TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
- return create(base, index, true) ;
- }
-
- /**
* Create a text-indexed dataset, using Lucene
- *
- * @param base the base Dataset
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
- {
- TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
- return create(base, index, true) ;
- }
-
- /**
- * Create a multilingual text-indexed dataset, using Lucene
*
* @param base the base Dataset
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param config The config definition for the index instantiation.
*/
- public static Dataset createLuceneMultilingual(Dataset base, Directory directory, EntityDefinition def)
+ public static Dataset createLucene(Dataset base, Directory directory, TextIndexConfig config)
{
- TextIndex index = createLuceneIndexMultilingual(directory, def) ;
+ TextIndex index = createLuceneIndex(directory, config) ;
return create(base, index, true) ;
}
@@ -170,44 +117,14 @@ public class TextDatasetFactory
*
* @param base the base DatasetGraph
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ * @param config The config definition for the index instantiation.
*/
- public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
+ public static DatasetGraph createLucene(DatasetGraph base, Directory directory, TextIndexConfig config)
{
- TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
+ TextIndex index = createLuceneIndex(directory, config) ;
return create(base, index, true) ;
}
- /**
- * Create a text-indexed dataset, using Lucene
- *
- * @param base the base DatasetGraph
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
- {
- TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
- return create(base, index, true) ;
- }
-
- /**
- * Create a multilingual text-indexed dataset, using Lucene
- *
- * @param base the base DatasetGraph
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- */
- public static DatasetGraph createLuceneMultilingual(DatasetGraph base, Directory directory, EntityDefinition def)
- {
- TextIndex index = createLuceneIndexMultilingual(directory, def) ;
- return create(base, index, true) ;
- }
-
-
/** Create a Solr TextIndex */
public static TextIndex createSolrIndex(SolrServer server, EntityDefinition entMap)
{
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
new file mode 100644
index 0000000..feeb324
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.lucene.analysis.Analyzer;
+
+public class TextIndexConfig {
+
+ EntityDefinition entDef;
+ Analyzer analyzer;
+ Analyzer queryAnalyzer;
+ boolean multilingualSupport;
+
+ public TextIndexConfig(EntityDefinition entDef) {
+ this.entDef = entDef;
+ }
+
+ public EntityDefinition getEntDef() {
+ return entDef;
+ }
+
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ public void setAnalyzer(Analyzer analyzer) {
+ this.analyzer = analyzer;
+ }
+
+ public Analyzer getQueryAnalyzer() {
+ return queryAnalyzer;
+ }
+
+ public void setQueryAnalyzer(Analyzer queryAnalyzer) {
+ this.queryAnalyzer = queryAnalyzer;
+ }
+
+ public boolean isMultilingualSupport() {
+ return multilingualSupport;
+ }
+
+ public void setMultilingualSupport(boolean multilingualSupport) {
+ this.multilingualSupport = multilingualSupport;
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index abb9466..cd9ea2f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -87,42 +87,29 @@ public class TextIndexLucene implements TextIndex {
* Constructs a new TextIndexLucene.
*
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ * @param config The config definition for the index instantiation.
*/
- public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer queryAnalyzer) {
- this(directory, def, null, queryAnalyzer);
- }
-
- /**
- * Constructs a new TextIndexLucene.
- *
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer) {
+ public TextIndexLucene(Directory directory, TextIndexConfig config) {
this.directory = directory ;
- this.docDef = def ;
+ this.docDef = config.getEntDef() ;
// create the analyzer as a wrapper that uses KeywordAnalyzer for
// entity and graph fields and StandardAnalyzer for all other
Map<String, Analyzer> analyzerPerField = new HashMap<>() ;
- analyzerPerField.put(def.getEntityField(), new KeywordAnalyzer()) ;
- if ( def.getGraphField() != null )
- analyzerPerField.put(def.getGraphField(), new KeywordAnalyzer()) ;
+ analyzerPerField.put(docDef.getEntityField(), new KeywordAnalyzer()) ;
+ if ( docDef.getGraphField() != null )
+ analyzerPerField.put(docDef.getGraphField(), new KeywordAnalyzer()) ;
- for (String field : def.fields()) {
- Analyzer _analyzer = def.getAnalyzer(field);
+ for (String field : docDef.fields()) {
+ Analyzer _analyzer = docDef.getAnalyzer(field);
if (_analyzer != null) {
analyzerPerField.put(field, _analyzer);
}
}
this.analyzer = new PerFieldAnalyzerWrapper(
- (null != analyzer) ? analyzer : new StandardAnalyzer(VER), analyzerPerField) ;
- this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : this.analyzer ;
+ (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
+ this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
openIndexWriter();
}
@@ -246,18 +233,17 @@ public class TextIndexLucene implements TextIndex {
doc.add(gField) ;
}
- for ( Field field : buildContentFields(entity) )
- doc.add(field);
+ String langField = docDef.getLangField() ;
- return doc ;
- }
-
- protected List<Field> buildContentFields(Entity entity) {
- List<Field> list = new ArrayList<>();
for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
- list.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
+ doc.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
+ if (langField != null) {
+ String lang = entity.getLanguage();
+ if (lang != null && !"".equals(lang))
+ doc.add(new Field(docDef.getLangField(), lang, StringField.TYPE_STORED));
+ }
}
- return list;
+ return doc ;
}
@Override
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index cdf7876..ce20294 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -18,29 +18,37 @@
package org.apache.jena.query.text;
+import org.apache.jena.query.text.analyzer.Util;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import java.io.IOException;
-import java.util.List;
public class TextIndexLuceneMultilingual extends TextIndexLucene {
- public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
- super(directory, def, null) ;
+ /**
+ * Constructs a new TextIndexLuceneMultilingual.
+ *
+ * @param directory The Lucene Directory for the index
+ * @param config The config definition for the index instantiation.
+ */
+ public TextIndexLuceneMultilingual(Directory directory, TextIndexConfig config) {
+ super(directory, config) ;
+
+ //multilingual index cannot work without lang field
+ if (config.getEntDef().getLangField() == null)
+ config.getEntDef().setLangField("lang");
}
@Override
protected void updateDocument(Entity entity) throws IOException {
Document doc = doc(entity);
Term term = new Term(getDocDef().getEntityField(), entity.getId());
- Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ Analyzer analyzer = Util.getLocalizedAnalyzer(entity.getLanguage());
if (analyzer == null)
analyzer = getAnalyzer();
getIndexWriter().updateDocument(term, doc, analyzer) ;
@@ -49,28 +57,19 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
@Override
protected void addDocument(Entity entity) throws IOException {
Document doc = doc(entity) ;
- Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ Analyzer analyzer = Util.getLocalizedAnalyzer(entity.getLanguage());
if (analyzer == null)
analyzer = getAnalyzer();
getIndexWriter().addDocument(doc, analyzer) ;
}
@Override
- protected List<Field> buildContentFields(Entity entity) {
- List<Field> list = super.buildContentFields(entity);
- String lang = entity.getLanguage();
- if (lang == null || "".equals(lang))
- lang = "undef";
- list.add( new Field("lang", lang, StringField.TYPE_STORED ) );
- return list;
- }
-
- @Override
protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
- String lang = queryString.substring( queryString.lastIndexOf(":") + 1);
- if (!"undef".equals(lang))
- analyzer = LuceneUtil.getLocalizedAnalyzer(lang);
-
+ if (queryString.contains(getDocDef().getLangField() + ":")) {
+ String lang = queryString.substring(queryString.lastIndexOf(":") + 1);
+ if (!"*".equals(lang))
+ analyzer = Util.getLocalizedAnalyzer(lang);
+ }
return super.preParseQuery(queryString, primaryField, analyzer);
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index 4fac00b..d568232 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -72,11 +72,9 @@ public class TextQueryPF extends PropertyFunctionBase {
throw new QueryBuildException("Subject is not a single node: " + argSubject) ;
if (argObject.isList()) {
- //extract of extra lang arg if present and if is usable (multilingual index).
+ //extract of extra lang arg if present and if is usable.
//arg is removed from the list to avoid conflict with order and args length
langArg = extractArg("lang", argObject);
- if (langArg == null && server instanceof TextIndexLuceneMultilingual)
- langArg = "undef";
List<Node> list = argObject.getArgList() ;
if (list.size() == 0)
@@ -210,10 +208,14 @@ public class TextQueryPF extends PropertyFunctionBase {
}
}
- //for multilingual index
- if (langArg != null) {
- String qs2 = "lang:" + langArg;
- queryString = "(" + queryString + ") AND " + qs2 ;
+ //for language-based search extension
+ if (server.getDocDef().getLangField() != null) {
+ String field = server.getDocDef().getLangField();
+ if (langArg != null) {
+ String qs2 = !"none".equals(langArg)?
+ field + ":" + langArg : "-" + field + ":*";
+ queryString = "(" + queryString + ") AND " + qs2;
+ }
}
Explain.explain(execCxt.getContext(), "Text query: "+queryString) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
new file mode 100644
index 0000000..11dd683
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer;
+
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.Version;
+import java.lang.reflect.Constructor;
+import java.util.Hashtable;
+
+public class Util {
+
+ private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
+ private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+
+ static {
+ initAnalyzerDefs();
+ }
+
+ public static Analyzer getLocalizedAnalyzer(String lang) {
+ return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+ }
+
+ public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
+ if (lang == null)
+ return null;
+
+ if (cache.containsKey(lang))
+ return cache.get(lang);
+
+ try {
+ Class<?> className = analyzersClasses.get(lang);
+ if (className == null)
+ return null;
+ Constructor constructor = className.getConstructor(Version.class);
+ Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
+ cache.put(lang, analyzer);
+ return analyzer;
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ private static void initAnalyzerDefs() {
+ analyzersClasses = new Hashtable<>();
+ analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
+ analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
+ analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
+ analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
+ analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
+ analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
+ analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
+ analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
+ analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
+ analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
+ analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
+ analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
+ analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
+ analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
+ analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
+ analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
+ analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
+ analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
+ analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
+ analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
+ analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
+ analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
+ analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
+ analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
+ analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
+ analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
+ analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
+ analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
+ analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
+ analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
index ca66f27..7604822 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
@@ -66,6 +66,9 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
" OPTIONAL {" ,
" ?eMap :graphField ?graphField" ,
" }",
+ " OPTIONAL {" ,
+ " ?eMap :langField ?langField" ,
+ " }",
"}") ;
ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1) ;
pss.setIri("eMap", root.getURI()) ;
@@ -87,6 +90,7 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
QuerySolution qsol1 = results.get(0) ;
String entityField = qsol1.getLiteral("entityField").getLexicalForm() ;
String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
+ String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null ;
Multimap<String, Node> mapDefs = HashMultimap.create() ;
@@ -155,7 +159,9 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
throw new TextIndexException("No definition of primary field '"+defaultField+"'") ;
}
- EntityDefinition docDef = new EntityDefinition(entityField, defaultField, graphField) ;
+ EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
+ docDef.setGraphField(graphField);
+ docDef.setLangField(langField);
for ( String f : mapDefs.keys() ) {
for ( Node p : mapDefs.get(f))
docDef.set(f, p) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
index 1e37c15..b9d83de 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
@@ -21,9 +21,9 @@ package org.apache.jena.query.text.assembler;
import org.apache.jena.assembler.Assembler;
import org.apache.jena.assembler.Mode;
import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.query.text.LuceneUtil;
import org.apache.jena.query.text.TextIndexException;
import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.query.text.analyzer.Util;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.lucene.analysis.Analyzer;
@@ -52,7 +52,7 @@ public class LocalizedAnalyzerAssembler extends AssemblerBase {
throw new TextIndexException("text:language property must be a string : " + node);
}
String lang = node.toString();
- return LuceneUtil.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+ return Util.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
} else {
return new StandardAnalyzer(TextIndexLucene.VER);
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 790dac7..021c003 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -30,7 +30,6 @@ public class TextAssembler
Assembler.general.implementWith(TextVocab.entityMap, new EntityDefinitionAssembler()) ;
Assembler.general.implementWith(TextVocab.textIndexSolr, new TextIndexSolrAssembler()) ;
Assembler.general.implementWith(TextVocab.textIndexLucene, new TextIndexLuceneAssembler()) ;
- Assembler.general.implementWith(TextVocab.textIndexLuceneMultilingual, new TextIndexLuceneMultilingualAssembler()) ;
Assembler.general.implementWith(TextVocab.standardAnalyzer, new StandardAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.simpleAnalyzer, new SimpleAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.keywordAnalyzer, new KeywordAnalyzerAssembler()) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 361841c..abc6c97 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -18,10 +18,6 @@
package org.apache.jena.query.text.assembler ;
-import static org.apache.jena.query.text.assembler.TextVocab.pDirectory ;
-import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap ;
-import static org.apache.jena.query.text.assembler.TextVocab.pQueryAnalyzer ;
-
import java.io.File ;
import java.io.IOException ;
@@ -30,10 +26,7 @@ import org.apache.jena.assembler.Mode ;
import org.apache.jena.assembler.assemblers.AssemblerBase ;
import org.apache.jena.atlas.io.IO ;
import org.apache.jena.atlas.lib.IRILib ;
-import org.apache.jena.query.text.EntityDefinition ;
-import org.apache.jena.query.text.TextDatasetFactory ;
-import org.apache.jena.query.text.TextIndex ;
-import org.apache.jena.query.text.TextIndexException ;
+import org.apache.jena.query.text.*;
import org.apache.jena.rdf.model.RDFNode ;
import org.apache.jena.rdf.model.Resource ;
import org.apache.jena.rdf.model.Statement ;
@@ -43,6 +36,8 @@ import org.apache.lucene.store.Directory ;
import org.apache.lucene.store.FSDirectory ;
import org.apache.lucene.store.RAMDirectory ;
+import static org.apache.jena.query.text.assembler.TextVocab.*;
+
public class TextIndexLuceneAssembler extends AssemblerBase {
/*
<#index> a :TextIndexLucene ;
@@ -77,7 +72,18 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
File dir = new File(path) ;
directory = FSDirectory.open(dir) ;
}
-
+
+ Analyzer analyzer = null;
+ Statement analyzerStatement = root.getProperty(pAnalyzer);
+ if (null != analyzerStatement) {
+ RDFNode aNode = analyzerStatement.getObject();
+ if (! aNode.isResource()) {
+ throw new TextIndexException("Text analyzer property is not a resource : " + aNode);
+ }
+ Resource analyzerResource = (Resource) aNode;
+ analyzer = (Analyzer) a.open(analyzerResource);
+ }
+
Analyzer queryAnalyzer = null;
Statement queryAnalyzerStatement = root.getProperty(pQueryAnalyzer);
if (null != queryAnalyzerStatement) {
@@ -89,10 +95,24 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
queryAnalyzer = (Analyzer) a.open(analyzerResource);
}
+ boolean isMultilingualSupport = false;
+ Statement mlSupportStatement = root.getProperty(pMultilingualSupport);
+ if (null != mlSupportStatement) {
+ RDFNode mlsNode = mlSupportStatement.getObject();
+ if (! mlsNode.isLiteral()) {
+ throw new TextIndexException("text:multilingualSupport property must be a string : " + mlsNode);
+ }
+ isMultilingualSupport = mlsNode.asLiteral().getBoolean();
+ }
+
Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
EntityDefinition docDef = (EntityDefinition)a.open(r) ;
+ TextIndexConfig config = new TextIndexConfig(docDef);
+ config.setAnalyzer(analyzer);
+ config.setQueryAnalyzer(queryAnalyzer);
+ config.setMultilingualSupport(isMultilingualSupport);
- return TextDatasetFactory.createLuceneIndex(directory, docDef, queryAnalyzer) ;
+ return TextDatasetFactory.createLuceneIndex(directory, config) ;
} catch (IOException e) {
IO.exception(e) ;
return null ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
deleted file mode 100644
index a36fcbe..0000000
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.query.text.assembler;
-
-import org.apache.jena.assembler.Assembler;
-import org.apache.jena.assembler.Mode;
-import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.atlas.io.IO;
-import org.apache.jena.atlas.lib.IRILib;
-import org.apache.jena.query.text.EntityDefinition;
-import org.apache.jena.query.text.TextDatasetFactory;
-import org.apache.jena.query.text.TextIndex;
-import org.apache.jena.query.text.TextIndexException;
-import org.apache.jena.rdf.model.RDFNode;
-import org.apache.jena.rdf.model.Resource;
-import org.apache.jena.sparql.util.graph.GraphUtils;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.RAMDirectory;
-
-import java.io.File;
-import java.io.IOException;
-
-import static org.apache.jena.query.text.assembler.TextVocab.pDirectory;
-import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap;
-
-public class TextIndexLuceneMultilingualAssembler extends AssemblerBase {
- /*
- <#index> a :TextIndexLuceneMultilingual ;
- #text:directory "mem" ;
- #text:directory "DIR" ;
- text:directory <file:DIR> ;
- text:entityMap <#endMap> ;
- .
- */
-
- @SuppressWarnings("resource")
- @Override
- public TextIndex open(Assembler a, Resource root, Mode mode) {
- try {
- if ( !GraphUtils.exactlyOneProperty(root, pDirectory) )
- throw new TextIndexException("No 'text:directory' property on " + root) ;
-
- Directory directory ;
-
- RDFNode n = root.getProperty(pDirectory).getObject() ;
- if ( n.isLiteral() ) {
- String literalValue = n.asLiteral().getLexicalForm() ;
- if (literalValue.equals("mem")) {
- directory = new RAMDirectory() ;
- } else {
- File dir = new File(literalValue) ;
- directory = FSDirectory.open(dir) ;
- }
- } else {
- Resource x = n.asResource() ;
- String path = IRILib.IRIToFilename(x.getURI()) ;
- File dir = new File(path) ;
- directory = FSDirectory.open(dir) ;
- }
-
- Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
- EntityDefinition docDef = (EntityDefinition)a.open(r) ;
-
- return TextDatasetFactory.createLuceneIndexMultilingual(directory, docDef) ;
- } catch (IOException e) {
- IO.exception(e) ;
- return null ;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 79c223e..802990d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -36,10 +36,10 @@ public class TextVocab
public static final Resource textIndex = Vocab.resource(NS, "TextIndex") ;
public static final Resource textIndexSolr = Vocab.resource(NS, "TextIndexSolr") ;
public static final Resource textIndexLucene = Vocab.resource(NS, "TextIndexLucene") ;
- public static final Resource textIndexLuceneMultilingual = Vocab.resource(NS, "TextIndexLuceneMultilingual") ;
public static final Property pLanguage = Vocab.property(NS, "language") ;
public static final Property pServer = Vocab.property(NS, "server") ; // Solr
public static final Property pDirectory = Vocab.property(NS, "directory") ; // Lucene
+ public static final Property pMultilingualSupport = Vocab.property(NS, "multilingualSupport") ;
public static final Property pQueryAnalyzer = Vocab.property(NS, "queryAnalyzer") ;
public static final Property pEntityMap = Vocab.property(NS, "entityMap") ;
@@ -47,6 +47,8 @@ public class TextVocab
public static final Resource entityMap = Vocab.resource(NS, "EntityMap") ;
public static final Property pEntityField = Vocab.property(NS, "entityField") ;
public static final Property pDefaultField = Vocab.property(NS, "defaultField") ;
+ public static final Property pGraphField = Vocab.property(NS, "graphField") ;
+ public static final Property pLangField = Vocab.property(NS, "langField") ;
public static final Property pMap = Vocab.property(NS, "map") ;
public static final Property pField = Vocab.property(NS, "field") ;
public static final Property pPredicate = Vocab.property(NS, "predicate") ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
index 56a81b6..1670f63 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
@@ -36,9 +36,11 @@ public class AbstractTestDatasetWithLuceneGraphTextIndex extends AbstractTestDat
public void init() {
Dataset ds1 = TDBFactory.createDataset() ;
Directory dir = new RAMDirectory() ;
- EntityDefinition eDef = new EntityDefinition("iri", "text", "graph", RDFS.label.asNode()) ;
+ EntityDefinition eDef = new EntityDefinition("iri", "text");
+ eDef.setGraphField("graph");
+ eDef.setPrimaryPredicate(RDFS.label.asNode());
eDef.set("comment", RDFS.comment.asNode()) ; // some tests require indexing rdfs:comment
- TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
+ TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
dataset = TextDatasetFactory.create(ds1, tidx) ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 0219675..6d1cb25 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -31,6 +31,7 @@ import org.junit.runners.Suite.SuiteClasses ;
TestBuildTextDataset.class
, TestDatasetWithLuceneTextIndex.class
, TestDatasetWithLuceneMultilingualTextIndex.class
+ , TestDatasetWithLuceneTextIndexWithLangField.class
, TestDatasetWithLuceneGraphTextIndex.class
// Embedded solr not supported
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
index 2c3564d..02d02f9 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
@@ -110,13 +110,14 @@ public class TestBuildTextDataset extends BaseTest
Dataset ds1 = DatasetFactory.createMem() ;
// Define the index mapping
- EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+ EntityDefinition entDef = new EntityDefinition("uri", "text");
+ entDef.setPrimaryPredicate(RDFS.label.asNode());
// Lucene, in memory.
Directory dir = new RAMDirectory() ;
// Join together into a dataset
- Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
+ Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef)) ;
return ds ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
index 58a78f1..53e2426 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
@@ -57,7 +57,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
"",
"[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
"text:TextDataset rdfs:subClassOf ja:RDFDataset .",
- "text:TextIndexLuceneMultilingual rdfs:subClassOf text:TextIndex .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
":" + SPEC_ROOT_LOCAL,
" a text:TextDataset ;",
@@ -74,8 +74,9 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
".",
"",
":indexLucene",
- " a text:TextIndexLuceneMultilingual ;",
+ " a text:TextIndexLucene ;",
" text:directory \"mem\" ;",
+ " text:multilingualSupport true ;",
" text:entityMap :entMap ;",
" .",
"",
@@ -83,6 +84,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" a text:EntityMap ;",
" text:entityField \"uri\" ;",
" text:defaultField \"label\" ;",
+ " text:langField \"lang\" ;",
" text:map (",
" [ text:field \"label\" ; text:predicate rdfs:label ]",
" [ text:field \"comment\" ; text:predicate rdfs:comment ]",
@@ -113,7 +115,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
QUERY_PROLOG,
"SELECT ?s",
"WHERE {",
- " ?s text:query ( rdfs:label \"book\" \"lang:en\" 10 ) .",
+ " ?s text:query ( rdfs:label 'book' 'lang:en' 10 ) .",
"}"
);
doTestSearch(turtle, queryString, new HashSet<String>());
@@ -131,7 +133,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" rdfs:label 'Er schluckte gift'@de",
"."
);
- // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
String queryString = StrUtils.strjoinNL(
QUERY_PROLOG,
"SELECT ?s",
@@ -156,7 +157,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" rdfs:label 'Er schluckte gift'@de",
"."
);
- // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
String queryString = StrUtils.strjoinNL(
QUERY_PROLOG,
"SELECT ?s",
@@ -177,7 +177,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" rdfs:label 'I met some engineers'@en",
"."
);
- // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
String queryString = StrUtils.strjoinNL(
QUERY_PROLOG,
"SELECT ?s",
@@ -191,6 +190,29 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
}
@Test
+ public void testRetrievingUnlocalizedResource(){
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testLocalizedResource>",
+ " rdfs:label 'A localized text'@en",
+ ".",
+ "<" + RESOURCE_BASE + "testUnlocalizedResource>",
+ " rdfs:label 'An unlocalized text'",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'text' 'lang:none' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testUnlocalizedResource")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+
+ @Test
public void testRetrievingSKOSConcepts() {
String queryString = StrUtils.strjoinNL(
"PREFIX text: <http://jena.apache.org/text#>",
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
new file mode 100644
index 0000000..9d99a29
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.Dataset;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Resource;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TestDatasetWithLuceneTextIndexWithLangField extends AbstractTestDatasetWithTextIndex {
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory \"mem\" ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:langField \"language\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; text:predicate rdfs:label ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ @Before
+ public void before() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+ @After
+ public void after() {
+ dataset.close();
+ }
+
+ @Test
+ public void testLiteralLanguageSearch(){
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "ParisInEnglish>",
+ " rdfs:label 'Paris, capital of France'@en",
+ ".",
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "ParisInFrench>",
+ " rdfs:label 'Paris, capitale de la France'@fr",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'paris' 'lang:en' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/ParisInEnglish")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
index dc02671..6e743a2 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
@@ -48,7 +48,9 @@ public class TestLuceneWithMultipleThreads
private static final EntityDefinition entDef;
static {
- entDef = new EntityDefinition("uri", "label", "graph", RDFS.label.asNode());
+ entDef = new EntityDefinition("uri", "label");
+ entDef.setGraphField("graph");
+ entDef.setPrimaryPredicate(RDFS.label.asNode());
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
entDef.setAnalyzer("label", analyzer);
}
@@ -56,7 +58,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testReadInMiddleOfWrite() throws InterruptedException, ExecutionException
{
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), new TextIndexConfig(entDef));
final Dataset ds = DatasetFactory.create(dsg);
final ExecutorService execService = Executors.newSingleThreadExecutor();
final Future<?> f = execService.submit(new Runnable()
@@ -112,7 +114,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testWriteInMiddleOfRead() throws InterruptedException, ExecutionException
{
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), new TextIndexConfig(entDef));
final int numReads = 10;
final Dataset ds = DatasetFactory.create(dsg);
final ExecutorService execService = Executors.newFixedThreadPool(10);
@@ -180,7 +182,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testIsolation() throws InterruptedException, ExecutionException {
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), entDef, null);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), new TextIndexConfig(entDef));
final int numReaders = 2;
final List<Future<?>> futures = new ArrayList<Future<?>>(numReaders);
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
index f3307f0..fa8a08a 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
@@ -38,8 +38,9 @@ public class TestTextTDB extends BaseTest
private static Dataset create() {
Dataset ds1 = TDBFactory.createDataset() ;
Directory dir = new RAMDirectory() ;
- EntityDefinition eDef = new EntityDefinition("iri", "text", RDFS.label) ;
- TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
+ EntityDefinition eDef = new EntityDefinition("iri", "text");
+ eDef.setPrimaryPredicate(RDFS.label);
+ TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
Dataset ds = TextDatasetFactory.create(ds1, tidx) ;
return ds ;
}
[4/6] jena git commit: Merge branch 'upstream/master' into
jena-text-ml-single-index
Posted by an...@apache.org.
Merge branch 'upstream/master' into jena-text-ml-single-index
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/ed717028
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/ed717028
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/ed717028
Branch: refs/heads/master
Commit: ed7170283975fbed9582caa6347016432e046618
Parents: 1a57c9d 8d5f1cb
Author: Alexis Miara <al...@hotmail.com>
Authored: Tue May 19 14:47:19 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Tue May 19 14:47:19 2015 -0400
----------------------------------------------------------------------
.../update/UpdateExecuteOperations.java | 17 +-
.../arq/examples/update/UpdateProgrammatic.java | 4 +-
.../arq/examples/update/UpdateReadFromFile.java | 4 +-
.../src/main/java/arq/cmdline/CmdUpdate.java | 19 +-
.../src/main/java/arq/cmdline/ModDataset.java | 7 +-
.../main/java/arq/cmdline/ModGraphStore.java | 95 ---
jena-arq/src/main/java/arq/load.java | 4 +-
jena-arq/src/main/java/arq/update.java | 69 +-
.../org/apache/jena/atlas/json/JsonArray.java | 2 +-
.../atlas/json/io/parser/TokenizerJSON.java | 697 +++++++++----------
.../java/org/apache/jena/query/Dataset.java | 8 +-
.../main/java/org/apache/jena/query/Query.java | 17 +
.../apache/jena/riot/out/NodeFormatterBase.java | 2 +-
.../apache/jena/riot/thrift/ThriftConvert.java | 8 +
.../apache/jena/sparql/core/VarExprList.java | 12 +-
.../sparql/core/assembler/AssemblerUtils.java | 1 -
.../core/assembler/GraphStoreAssembler.java | 47 --
.../apache/jena/sparql/engine/http/Service.java | 1 -
.../apache/jena/sparql/lang/arq/ARQParser.java | 2 +-
.../jena/sparql/modify/GraphStoreBasic.java | 19 -
.../jena/sparql/modify/GraphStoreNull.java | 20 +-
.../jena/sparql/modify/GraphStoreWrapper.java | 13 -
.../jena/sparql/modify/UpdateEngineBase.java | 9 +-
.../jena/sparql/modify/UpdateEngineFactory.java | 6 +-
.../jena/sparql/modify/UpdateEngineMain.java | 31 +-
.../sparql/modify/UpdateEngineNonStreaming.java | 32 +-
.../sparql/modify/UpdateEngineRegistry.java | 6 +-
.../jena/sparql/modify/UpdateEngineWorker.java | 117 ++--
.../jena/sparql/modify/UpdateProcessRemote.java | 3 +-
.../sparql/modify/UpdateProcessRemoteBase.java | 22 +-
.../sparql/modify/UpdateProcessRemoteForm.java | 3 +-
.../jena/sparql/modify/UpdateProcessorBase.java | 23 +-
.../modify/UpdateProcessorStreamingBase.java | 42 +-
.../java/org/apache/jena/update/GraphStore.java | 15 +-
.../apache/jena/update/GraphStoreFactory.java | 7 +
.../org/apache/jena/update/UpdateAction.java | 172 +++--
.../jena/update/UpdateExecutionFactory.java | 193 +++--
.../org/apache/jena/update/UpdateProcessor.java | 13 +-
.../jena/update/UpdateProcessorStreaming.java | 9 +-
.../jena/web/DatasetGraphAccessorHTTP.java | 3 +-
jena-arq/src/main/java/riotcmd/utf8.java | 48 +-
.../query/TestParameterizedSparqlString.java | 3 +-
.../jena/riot/lang/TestPipedRDFIterators.java | 6 +-
.../jena/riot/system/TestIO_JenaReaders.java | 4 +-
.../jena/riot/system/TestIO_JenaWriters.java | 4 +-
.../apache/jena/riot/thrift/TS_RDFThrift.java | 1 -
.../apache/jena/riot/thrift/TestThriftTerm.java | 48 +-
.../sparql/core/AbstractTestDynamicDataset.java | 5 +-
.../sparql/core/TestDatasetGraphWithLock.java | 4 +-
.../jena/sparql/core/TestDynamicDatasetMem.java | 8 +-
.../sparql/engine/index/TestIndexTable.java | 4 +-
.../apache/jena/sparql/graph/GraphsTests.java | 33 +-
.../apache/jena/sparql/graph/TestDatasets.java | 20 +-
.../apache/jena/sparql/junit/EarlTestCase.java | 8 +-
.../org/apache/jena/sparql/junit/QueryTest.java | 10 +-
.../jena/sparql/junit/SurpressedTest.java | 2 +-
.../apache/jena/sparql/junit/SyntaxTest.java | 2 +-
.../jena/sparql/junit/SyntaxUpdateTest.java | 2 +-
.../jena/sparql/junit/TestSerialization.java | 2 +-
.../apache/jena/sparql/junit/UpdateTest.java | 6 +-
.../sparql/modify/AbstractTestUpdateBase.java | 50 +-
.../sparql/modify/AbstractTestUpdateGraph.java | 49 +-
.../modify/AbstractTestUpdateGraphMgt.java | 10 +-
.../jena/sparql/modify/TestUpdateGraphMem.java | 9 +-
.../sparql/modify/TestUpdateGraphMgtMem.java | 15 +-
.../sparql/modify/TestUpdateOperations.java | 17 +-
.../apache/jena/sparql/util/TestFmtUtils.java | 6 +-
.../apache/jena/atlas/io/CharStreamReader.java | 23 +-
.../main/java/org/apache/jena/atlas/io/IO.java | 13 +-
.../org/apache/jena/atlas/io/InStreamASCII.java | 4 +-
.../org/apache/jena/atlas/io/InStreamUTF8.java | 5 +-
.../apache/jena/atlas/io/IndentedWriter.java | 7 +-
.../apache/jena/atlas/io/PeekInputStream.java | 98 ++-
.../org/apache/jena/atlas/io/PeekReader.java | 3 +-
.../org/apache/jena/atlas/iterator/Iter.java | 152 +---
.../java/org/apache/jena/atlas/lib/Chars.java | 2 -
.../java/org/apache/jena/atlas/lib/IRILib.java | 1 -
.../jena/atlas/lib/TestDateTimeUtils.java | 10 +-
.../apache/jena/datatypes/xsd/XSDDatatype.java | 2 +-
.../org/apache/jena/n3/N3IndentedWriter.java | 1 -
.../org/apache/jena/n3/N3JenaWriterCommon.java | 17 +-
.../java/org/apache/jena/n3/N3JenaWriterPP.java | 10 +-
.../jena/n3/turtle/parser/TurtleParser.java | 7 +-
.../jena/ontology/impl/OWLLiteProfile.java | 2 -
.../rdfxml/xmlinput/impl/AbsXMLContext.java | 7 -
.../jena/reasoner/rulesys/FBRuleInfGraph.java | 4 +-
.../jena/reasoner/rulesys/builtins/Bound.java | 1 -
.../jena/reasoner/rulesys/builtins/Drop.java | 1 -
.../jena/reasoner/rulesys/builtins/Remove.java | 1 -
.../jena/reasoner/rulesys/builtins/Unbound.java | 1 -
.../jena/reasoner/rulesys/impl/Generator.java | 1 -
.../jena/reasoner/rulesys/impl/LPRuleStore.java | 1 -
.../reasoner/rulesys/impl/RuleClauseCode.java | 4 +-
.../jena/shared/impl/PrefixMappingImpl.java | 2 +-
.../jena/ontology/impl/TestAllDifferent.java | 2 +-
.../jena/ontology/impl/TestClassExpression.java | 112 +--
.../jena/ontology/impl/TestIndividual.java | 42 +-
.../apache/jena/ontology/impl/TestOntTools.java | 3 +-
.../apache/jena/ontology/impl/TestOntology.java | 8 +-
.../apache/jena/ontology/impl/TestProperty.java | 54 +-
.../apache/jena/ontology/impl/TestResource.java | 50 +-
.../model/test/AbstractContainerMethods.java | 2 +-
.../rdf/model/test/AbstractModelTestBase.java | 4 +-
.../jena/rdf/model/test/IsomorphicTests.java | 4 +-
.../jena/rdf/model/test/TestAddAndContains.java | 4 +-
.../jena/rdf/model/test/TestAddModel.java | 4 +-
.../jena/rdf/model/test/TestConcurrency.java | 4 +-
.../rdf/model/test/TestConcurrencyNesting.java | 2 +-
.../rdf/model/test/TestConcurrencyParallel.java | 2 +-
.../rdf/model/test/TestCopyInOutOfModel.java | 2 +-
.../org/apache/jena/rdf/model/test/TestGet.java | 4 +-
.../jena/rdf/model/test/TestIterators.java | 2 +-
.../jena/rdf/model/test/TestListStatements.java | 4 +-
.../jena/rdf/model/test/TestListSubjects.java | 2 +-
.../rdf/model/test/TestLiteralsInModel.java | 2 +-
.../jena/rdf/model/test/TestModelEvents.java | 2 +-
.../rdf/model/test/TestModelSetOperations.java | 4 +-
.../rdf/model/test/TestObjectOfProperties.java | 2 +-
.../apache/jena/rdf/model/test/TestObjects.java | 4 +-
.../rdf/model/test/TestReifiedStatements.java | 2 +-
.../rdf/model/test/TestResourceMethods.java | 2 +-
.../jena/rdf/model/test/TestSelectorUse.java | 2 +-
.../jena/rdf/model/test/TestSeqMethods.java | 2 +-
.../model/test/TestSimpleListStatements.java | 2 +-
.../jena/rdf/model/test/TestSimpleSelector.java | 2 +-
.../rdf/model/test/TestStatementCreation.java | 4 +-
.../rdf/model/test/TestStatementMethods.java | 2 +-
.../jena/rdfxml/xmloutput/PrettyWriterTest.java | 2 +-
.../jena/rdfxml/xmloutput/TestXMLFeatures.java | 4 +-
.../jena/propertytable/graph/GraphCSVTest.java | 4 +-
.../arq/querybuilder/AbstractQueryBuilder.java | 28 +-
.../jena/arq/querybuilder/AskBuilder.java | 12 +
.../jena/arq/querybuilder/ConstructBuilder.java | 31 +-
.../jena/arq/querybuilder/SelectBuilder.java | 31 +-
.../arq/querybuilder/clauses/SelectClause.java | 33 +
.../arq/querybuilder/clauses/WhereClause.java | 20 +
.../querybuilder/handlers/DatasetHandler.java | 41 +-
.../jena/arq/querybuilder/handlers/Handler.java | 1 +
.../querybuilder/handlers/SelectHandler.java | 48 +-
.../handlers/SolutionModifierHandler.java | 12 +-
.../arq/querybuilder/handlers/WhereHandler.java | 37 +-
.../jena/arq/AbstractRegexpBasedTest.java | 1 +
.../querybuilder/clauses/SelectClauseTest.java | 29 +
.../querybuilder/clauses/WhereClauseTest.java | 100 ++-
.../handlers/SelectHandlerTest.java | 25 +
.../querybuilder/handlers/WhereHandlerTest.java | 22 +
.../org/apache/jena/iri/TestIRIFactory.java | 2 +-
.../java/org/apache/jena/sdb/SDBFactory.java | 2 +-
.../apache/jena/sdb/modify/UpdateEngineSDB.java | 6 +-
.../apache/jena/sdb/store/DatasetGraphSDB.java | 17 -
.../jena/sdb/test/modify/TestSPARQLUpdate.java | 7 +-
.../sdb/test/modify/TestSPARQLUpdateMgt.java | 8 +-
.../apache/jena/tdb/modify/UpdateEngineTDB.java | 12 +-
.../apache/jena/tdb/store/DatasetGraphTDB.java | 14 -
.../transaction/DatasetGraphTransaction.java | 190 ++---
.../main/java/tdb/cmdline/ModTDBGraphStore.java | 51 --
jena-tdb/src/main/java/tdb/tdbupdate.java | 15 +-
.../jena/tdb/store/TestDynamicDatasetTDB.java | 3 +
.../apache/jena/tdb/store/Test_SPARQL_TDB.java | 3 +-
159 files changed, 1725 insertions(+), 1959 deletions(-)
----------------------------------------------------------------------
[6/6] jena git commit: Add back constructor to EntityDefinition for
compatibility.
Posted by an...@apache.org.
Add back constructor to EntityDefinition for compatibility.
Fix un-generics.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/66a1eda8
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/66a1eda8
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/66a1eda8
Branch: refs/heads/master
Commit: 66a1eda82eeee2d8f551fac06d6b0a2672decdc2
Parents: 086b05c
Author: Andy Seaborne <an...@apache.org>
Authored: Mon May 25 13:10:34 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Mon May 25 13:10:34 2015 +0100
----------------------------------------------------------------------
.../jena/query/text/EntityDefinition.java | 70 +++++++++++++++++---
.../org/apache/jena/query/text/TextQueryPF.java | 4 +-
.../apache/jena/query/text/analyzer/Util.java | 4 +-
...ractTestDatasetWithLuceneGraphTextIndex.java | 2 +-
.../jena/query/text/TestBuildTextDataset.java | 2 +-
.../text/TestLuceneWithMultipleThreads.java | 2 +-
6 files changed, 68 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
index 30b048a..2a68247 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
@@ -33,17 +33,16 @@ import org.apache.lucene.analysis.Analyzer ;
* Definition of a "document"
*/
public class EntityDefinition {
- private final Map<Node, String> predicateToField = new HashMap<>() ;
- private final Map<String, Analyzer> fieldToAnalyzer = new HashMap<>();
+ private final Map<Node, String> predicateToField = new HashMap<>() ;
+ private final Map<String, Analyzer> fieldToAnalyzer = new HashMap<>() ;
private final ListMultimap<String, Node> fieldToPredicate = ArrayListMultimap.create() ;
- private final Collection<String> fields = Collections.unmodifiableCollection(fieldToPredicate.keys()) ;
+ private final Collection<String> fields = Collections.unmodifiableCollection(fieldToPredicate.keys()) ;
// private final Collection<String> fields =
// Collections.unmodifiableCollection(fieldToPredicate.keySet()) ;
- private final String entityField ;
- private final String primaryField ;
- private String graphField ;
- private String langField ;
- //private final Node primaryPredicate ;
+ private final String entityField ;
+ private final String primaryField ;
+ private String graphField = null ;
+ private String langField ;
/**
* @param entityField
@@ -56,6 +55,61 @@ public class EntityDefinition {
this.primaryField = primaryField ;
}
+ /**
+ * @param entityField
+ * The entity being indexed (e.g. it's URI).
+ * @param primaryField
+ * The primary/default field to search
+ * @param graphField
+ * The field that stores graph URI, or null
+ */
+ public EntityDefinition(String entityField, String primaryField, String graphField) {
+ this(entityField, primaryField) ;
+ setGraphField(graphField);
+ }
+
+ /**
+ * @param entityField
+ * The entity being indexed (e.g. it's URI).
+ * @param primaryField
+ * The primary/default field to search
+ * @param primaryPredicate
+ * The property associated with the primary/default field
+ */
+ public EntityDefinition(String entityField, String primaryField, Resource primaryPredicate) {
+ this(entityField, primaryField) ;
+ setPrimaryPredicate(primaryPredicate);
+ }
+
+ /**
+ * @param entityField
+ * The entity being indexed (e.g. it's URI).
+ * @param primaryField
+ * The primary/default field to search
+ * @param primaryPredicate
+ * The property associated with the primary/default field
+ */
+ public EntityDefinition(String entityField, String primaryField, Node primaryPredicate) {
+ this(entityField, primaryField) ;
+ setPrimaryPredicate(primaryPredicate);
+ }
+
+ /**
+ * @param entityField
+ * The entity being indexed (e.g. it's URI).
+ * @param primaryField
+ * The primary/default field to search
+ * @param graphField
+ * The field that stores graph URI, or null
+ * @param primaryPredicate
+ * The property associated with the primary/default field
+ */
+ public EntityDefinition(String entityField, String primaryField, String graphField, Node primaryPredicate) {
+ this(entityField, primaryField) ;
+ setGraphField(graphField);
+ setPrimaryPredicate(primaryPredicate) ;
+ }
+
public String getEntityField() {
return entityField ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index d568232..81dc412 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -18,7 +18,6 @@
package org.apache.jena.query.text ;
-import java.util.Iterator;
import java.util.List ;
import org.apache.jena.atlas.iterator.Iter ;
@@ -110,8 +109,7 @@ public class TextQueryPF extends PropertyFunctionBase {
private String extractArg(String prefix, PropFuncArg argObject) {
String value = null;
int pos = 0;
- for (Iterator it = argObject.getArgList().iterator(); it.hasNext(); ) {
- Node node = (Node)it.next();
+ for (Node node : argObject.getArgList()) {
if (node.isLiteral()) {
String arg = node.getLiteral().toString();
if (arg.startsWith(prefix + ":")) {
http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 11dd683..c8e3490 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -26,7 +26,7 @@ import java.util.Hashtable;
public class Util {
- private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
+ private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
static {
@@ -48,7 +48,7 @@ public class Util {
Class<?> className = analyzersClasses.get(lang);
if (className == null)
return null;
- Constructor constructor = className.getConstructor(Version.class);
+ Constructor<?> constructor = className.getConstructor(Version.class);
Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
cache.put(lang, analyzer);
return analyzer;
http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
index 1670f63..720e372 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
@@ -38,7 +38,7 @@ public class AbstractTestDatasetWithLuceneGraphTextIndex extends AbstractTestDat
Directory dir = new RAMDirectory() ;
EntityDefinition eDef = new EntityDefinition("iri", "text");
eDef.setGraphField("graph");
- eDef.setPrimaryPredicate(RDFS.label.asNode());
+ eDef.setPrimaryPredicate(RDFS.label);
eDef.set("comment", RDFS.comment.asNode()) ; // some tests require indexing rdfs:comment
TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
dataset = TextDatasetFactory.create(ds1, tidx) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
index 02d02f9..2335e40 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
@@ -111,7 +111,7 @@ public class TestBuildTextDataset extends BaseTest
// Define the index mapping
EntityDefinition entDef = new EntityDefinition("uri", "text");
- entDef.setPrimaryPredicate(RDFS.label.asNode());
+ entDef.setPrimaryPredicate(RDFS.label);
// Lucene, in memory.
Directory dir = new RAMDirectory() ;
http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
index 6e743a2..5bbe7c3 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
@@ -50,7 +50,7 @@ public class TestLuceneWithMultipleThreads
static {
entDef = new EntityDefinition("uri", "label");
entDef.setGraphField("graph");
- entDef.setPrimaryPredicate(RDFS.label.asNode());
+ entDef.setPrimaryPredicate(RDFS.label);
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
entDef.setAnalyzer("label", analyzer);
}
[5/6] jena git commit: Merge commit 'refs/pull/64/head' of
github.com:apache/jena
Posted by an...@apache.org.
Merge commit 'refs/pull/64/head' of github.com:apache/jena
This closes #64.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/086b05c8
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/086b05c8
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/086b05c8
Branch: refs/heads/master
Commit: 086b05c8450ee08005ba18861502280fea17c443
Parents: dc19466 ed71702
Author: Andy Seaborne <an...@apache.org>
Authored: Mon May 25 12:55:13 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Mon May 25 12:55:13 2015 +0100
----------------------------------------------------------------------
.../main/java/examples/JenaTextExample1.java | 6 +-
.../java/org/apache/jena/query/text/Entity.java | 9 +-
.../jena/query/text/EntityDefinition.java | 70 ++----
.../jena/query/text/TextDatasetFactory.java | 50 ++--
.../apache/jena/query/text/TextIndexConfig.java | 61 +++++
.../apache/jena/query/text/TextIndexLucene.java | 68 ++++--
.../query/text/TextIndexLuceneMultilingual.java | 75 ++++++
.../apache/jena/query/text/TextQueryFuncs.java | 9 +-
.../org/apache/jena/query/text/TextQueryPF.java | 41 +++-
.../apache/jena/query/text/analyzer/Util.java | 96 ++++++++
.../assembler/EntityDefinitionAssembler.java | 8 +-
.../assembler/LocalizedAnalyzerAssembler.java | 60 +++++
.../query/text/assembler/TextAssembler.java | 1 +
.../assembler/TextIndexLuceneAssembler.java | 40 +++-
.../jena/query/text/assembler/TextVocab.java | 5 +
...ractTestDatasetWithLuceneGraphTextIndex.java | 6 +-
.../org/apache/jena/query/text/TS_Text.java | 3 +
.../jena/query/text/TestBuildTextDataset.java | 5 +-
.../text/TestDatasetWithLocalizedAnalyzer.java | 147 ++++++++++++
...tDatasetWithLuceneMultilingualTextIndex.java | 238 +++++++++++++++++++
...DatasetWithLuceneTextIndexWithLangField.java | 126 ++++++++++
.../text/TestLuceneWithMultipleThreads.java | 10 +-
.../org/apache/jena/query/text/TestTextTDB.java | 5 +-
jena-text/testing/TextQuery/data.skos | 36 +++
24 files changed, 1047 insertions(+), 128 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/086b05c8/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
----------------------------------------------------------------------