You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/05/25 14:10:54 UTC
[3/6] jena git commit: langField implementation to store lang tags of
literals + refactoring growing methods of TextDatasetFactory
langField implementation to store lang tags of literals
+ refactoring growing methods of TextDatasetFactory
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1a57c9d3
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1a57c9d3
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1a57c9d3
Branch: refs/heads/master
Commit: 1a57c9d35b9ecf17c7e65c6bf7f19951adc3e44f
Parents: 7ab59ed
Author: Alexis Miara <al...@hotmail.com>
Authored: Tue May 19 14:41:32 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Tue May 19 14:41:32 2015 -0400
----------------------------------------------------------------------
.../main/java/examples/JenaTextExample1.java | 6 +-
.../jena/query/text/EntityDefinition.java | 70 +++--------
.../org/apache/jena/query/text/LuceneUtil.java | 95 --------------
.../jena/query/text/TextDatasetFactory.java | 109 ++--------------
.../apache/jena/query/text/TextIndexConfig.java | 61 +++++++++
.../apache/jena/query/text/TextIndexLucene.java | 50 +++-----
.../query/text/TextIndexLuceneMultilingual.java | 41 +++---
.../org/apache/jena/query/text/TextQueryPF.java | 16 +--
.../apache/jena/query/text/analyzer/Util.java | 96 ++++++++++++++
.../assembler/EntityDefinitionAssembler.java | 8 +-
.../assembler/LocalizedAnalyzerAssembler.java | 4 +-
.../query/text/assembler/TextAssembler.java | 1 -
.../assembler/TextIndexLuceneAssembler.java | 40 ++++--
.../TextIndexLuceneMultilingualAssembler.java | 87 -------------
.../jena/query/text/assembler/TextVocab.java | 4 +-
...ractTestDatasetWithLuceneGraphTextIndex.java | 6 +-
.../org/apache/jena/query/text/TS_Text.java | 1 +
.../jena/query/text/TestBuildTextDataset.java | 5 +-
...tDatasetWithLuceneMultilingualTextIndex.java | 34 ++++-
...DatasetWithLuceneTextIndexWithLangField.java | 126 +++++++++++++++++++
.../text/TestLuceneWithMultipleThreads.java | 10 +-
.../org/apache/jena/query/text/TestTextTDB.java | 5 +-
22 files changed, 453 insertions(+), 422 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/examples/JenaTextExample1.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/examples/JenaTextExample1.java b/jena-text/src/main/java/examples/JenaTextExample1.java
index 6ad2c26..c273540 100644
--- a/jena-text/src/main/java/examples/JenaTextExample1.java
+++ b/jena-text/src/main/java/examples/JenaTextExample1.java
@@ -23,6 +23,7 @@ import org.apache.jena.atlas.logging.LogCtl ;
import org.apache.jena.query.* ;
import org.apache.jena.query.text.EntityDefinition ;
import org.apache.jena.query.text.TextDatasetFactory ;
+import org.apache.jena.query.text.TextIndexConfig;
import org.apache.jena.query.text.TextQuery ;
import org.apache.jena.rdf.model.Model ;
import org.apache.jena.riot.RDFDataMgr ;
@@ -58,13 +59,14 @@ public class JenaTextExample1
Dataset ds1 = DatasetFactory.createMem() ;
// Define the index mapping
- EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+ EntityDefinition entDef = new EntityDefinition("uri", "text");
+ entDef.setPrimaryPredicate(RDFS.label.asNode());
// Lucene, in memory.
Directory dir = new RAMDirectory();
// Join together into a dataset
- Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
+ Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef)) ;
return ds ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
index 2f15ffb..30b048a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
@@ -41,7 +41,8 @@ public class EntityDefinition {
// Collections.unmodifiableCollection(fieldToPredicate.keySet()) ;
private final String entityField ;
private final String primaryField ;
- private final String graphField ;
+ private String graphField ;
+ private String langField ;
//private final Node primaryPredicate ;
/**
@@ -51,67 +52,22 @@ public class EntityDefinition {
* The primary/default field to search
*/
public EntityDefinition(String entityField, String primaryField) {
- this(entityField, primaryField, (String)null) ;
- }
-
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param graphField
- * The field that stores graph URI, or null
- */
- public EntityDefinition(String entityField, String primaryField, String graphField) {
this.entityField = entityField ;
this.primaryField = primaryField ;
- this.graphField = graphField ;
}
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param primaryPredicate
- * The property associated with the primary/default field
- */
- public EntityDefinition(String entityField, String primaryField, Resource primaryPredicate) {
- this(entityField, primaryField, null, primaryPredicate.asNode()) ;
+ public String getEntityField() {
+ return entityField ;
}
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param primaryPredicate
- * The property associated with the primary/default field
- */
- public EntityDefinition(String entityField, String primaryField, Node primaryPredicate) {
- this(entityField, primaryField, null, primaryPredicate) ;
+ public void setPrimaryPredicate(Resource primaryPredicate) {
+ setPrimaryPredicate(primaryPredicate.asNode());
}
- /**
- * @param entityField
- * The entity being indexed (e.g. it's URI).
- * @param primaryField
- * The primary/default field to search
- * @param graphField
- * The field that stores graph URI, or null
- * @param primaryPredicate
- * The property associated with the primary/default field
- */
- public EntityDefinition(String entityField, String primaryField, String graphField, Node primaryPredicate) {
- this(entityField, primaryField, graphField) ;
+ public void setPrimaryPredicate(Node primaryPredicate) {
set(primaryField, primaryPredicate) ;
}
-
- public String getEntityField() {
- return entityField ;
- }
-
public void set(String field, Node predicate) {
predicateToField.put(predicate, field) ;
// Add uniquely.
@@ -149,6 +105,18 @@ public class EntityDefinition {
return graphField ;
}
+ public void setGraphField(String graphField) {
+ this.graphField = graphField;
+ }
+
+ public String getLangField() {
+ return langField;
+ }
+
+ public void setLangField(String langField) {
+ this.langField = langField;
+ }
+
public Collection<String> fields() {
return fields ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
deleted file mode 100644
index 050b6f3..0000000
--- a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.query.text;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.util.Version;
-import java.lang.reflect.Constructor;
-import java.util.Hashtable;
-
-public class LuceneUtil {
-
- private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
- private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
-
- static {
- initAnalyzerDefs();
- }
-
- public static Analyzer getLocalizedAnalyzer(String lang) {
- return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
- }
-
- public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
- if (lang == null)
- return null;
-
- if (cache.containsKey(lang))
- return cache.get(lang);
-
- try {
- Class<?> className = analyzersClasses.get(lang);
- if (className == null)
- return null;
- Constructor constructor = className.getConstructor(Version.class);
- Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
- cache.put(lang, analyzer);
- return analyzer;
- } catch (Exception e) {
- e.printStackTrace();
- return null;
- }
- }
-
- private static void initAnalyzerDefs() {
- analyzersClasses = new Hashtable<>();
- analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
- analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
- analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
- analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
- analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
- analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
- analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
- analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
- analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
- analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
- analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
- analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
- analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
- analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
- analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
- analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
- analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
- analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
- analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
- analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
- analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
- analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
- analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
- analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
- analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
- analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
- analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
- analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
- analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
- analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
- analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
- analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index dd48bfa..dc6a094 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -24,7 +24,6 @@ import org.apache.jena.query.text.assembler.TextVocab ;
import org.apache.jena.sparql.core.DatasetGraph ;
import org.apache.jena.sparql.core.assembler.AssemblerUtils ;
import org.apache.jena.sparql.util.Context ;
-import org.apache.lucene.analysis.Analyzer ;
import org.apache.lucene.store.Directory ;
import org.apache.solr.client.solrj.SolrServer ;
@@ -88,80 +87,28 @@ public class TextDatasetFactory
* Create a Lucene TextIndex
*
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ * @param config The config definition for the index instantiation.
*/
- public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
+ public static TextIndex createLuceneIndex(Directory directory, TextIndexConfig config)
{
- TextIndex index = new TextIndexLucene(directory, def, queryAnalyzer) ;
+ TextIndex index;
+ if (config.isMultilingualSupport())
+ index = new TextIndexLuceneMultilingual(directory, config) ;
+ else
+ index = new TextIndexLucene(directory, config) ;
return index ;
}
/**
- * Create a Lucene TextIndex
- *
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
- {
- TextIndex index = new TextIndexLucene(directory, def, analyzer, queryAnalyzer) ;
- return index ;
- }
-
- /**
- * Create a multilingual Lucene TextIndex
- *
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- */
- public static TextIndex createLuceneIndexMultilingual(Directory directory, EntityDefinition def)
- {
- TextIndex index = new TextIndexLuceneMultilingual(directory, def) ;
- return index ;
- }
-
- /**
- * Create a text-indexed dataset, using Lucene
- *
- * @param base the base Dataset
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
- {
- TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
- return create(base, index, true) ;
- }
-
- /**
* Create a text-indexed dataset, using Lucene
- *
- * @param base the base Dataset
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
- {
- TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
- return create(base, index, true) ;
- }
-
- /**
- * Create a multilingual text-indexed dataset, using Lucene
*
* @param base the base Dataset
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
+ * @param config The config definition for the index instantiation.
*/
- public static Dataset createLuceneMultilingual(Dataset base, Directory directory, EntityDefinition def)
+ public static Dataset createLucene(Dataset base, Directory directory, TextIndexConfig config)
{
- TextIndex index = createLuceneIndexMultilingual(directory, def) ;
+ TextIndex index = createLuceneIndex(directory, config) ;
return create(base, index, true) ;
}
@@ -170,44 +117,14 @@ public class TextDatasetFactory
*
* @param base the base DatasetGraph
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ * @param config The config definition for the index instantiation.
*/
- public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
+ public static DatasetGraph createLucene(DatasetGraph base, Directory directory, TextIndexConfig config)
{
- TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
+ TextIndex index = createLuceneIndex(directory, config) ;
return create(base, index, true) ;
}
- /**
- * Create a text-indexed dataset, using Lucene
- *
- * @param base the base DatasetGraph
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
- {
- TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
- return create(base, index, true) ;
- }
-
- /**
- * Create a multilingual text-indexed dataset, using Lucene
- *
- * @param base the base DatasetGraph
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- */
- public static DatasetGraph createLuceneMultilingual(DatasetGraph base, Directory directory, EntityDefinition def)
- {
- TextIndex index = createLuceneIndexMultilingual(directory, def) ;
- return create(base, index, true) ;
- }
-
-
/** Create a Solr TextIndex */
public static TextIndex createSolrIndex(SolrServer server, EntityDefinition entMap)
{
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
new file mode 100644
index 0000000..feeb324
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.lucene.analysis.Analyzer;
+
+public class TextIndexConfig {
+
+ EntityDefinition entDef;
+ Analyzer analyzer;
+ Analyzer queryAnalyzer;
+ boolean multilingualSupport;
+
+ public TextIndexConfig(EntityDefinition entDef) {
+ this.entDef = entDef;
+ }
+
+ public EntityDefinition getEntDef() {
+ return entDef;
+ }
+
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ public void setAnalyzer(Analyzer analyzer) {
+ this.analyzer = analyzer;
+ }
+
+ public Analyzer getQueryAnalyzer() {
+ return queryAnalyzer;
+ }
+
+ public void setQueryAnalyzer(Analyzer queryAnalyzer) {
+ this.queryAnalyzer = queryAnalyzer;
+ }
+
+ public boolean isMultilingualSupport() {
+ return multilingualSupport;
+ }
+
+ public void setMultilingualSupport(boolean multilingualSupport) {
+ this.multilingualSupport = multilingualSupport;
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index abb9466..cd9ea2f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -87,42 +87,29 @@ public class TextIndexLucene implements TextIndex {
* Constructs a new TextIndexLucene.
*
* @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
+ * @param config The config definition for the index instantiation.
*/
- public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer queryAnalyzer) {
- this(directory, def, null, queryAnalyzer);
- }
-
- /**
- * Constructs a new TextIndexLucene.
- *
- * @param directory The Lucene Directory for the index
- * @param def The EntityDefinition that defines how entities are stored in the index
- * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
- * @param queryAnalyzer The analyzer to be used to find terms in the query text. If null, then the analyzer defined by the EntityDefinition will be used.
- */
- public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer) {
+ public TextIndexLucene(Directory directory, TextIndexConfig config) {
this.directory = directory ;
- this.docDef = def ;
+ this.docDef = config.getEntDef() ;
// create the analyzer as a wrapper that uses KeywordAnalyzer for
// entity and graph fields and StandardAnalyzer for all other
Map<String, Analyzer> analyzerPerField = new HashMap<>() ;
- analyzerPerField.put(def.getEntityField(), new KeywordAnalyzer()) ;
- if ( def.getGraphField() != null )
- analyzerPerField.put(def.getGraphField(), new KeywordAnalyzer()) ;
+ analyzerPerField.put(docDef.getEntityField(), new KeywordAnalyzer()) ;
+ if ( docDef.getGraphField() != null )
+ analyzerPerField.put(docDef.getGraphField(), new KeywordAnalyzer()) ;
- for (String field : def.fields()) {
- Analyzer _analyzer = def.getAnalyzer(field);
+ for (String field : docDef.fields()) {
+ Analyzer _analyzer = docDef.getAnalyzer(field);
if (_analyzer != null) {
analyzerPerField.put(field, _analyzer);
}
}
this.analyzer = new PerFieldAnalyzerWrapper(
- (null != analyzer) ? analyzer : new StandardAnalyzer(VER), analyzerPerField) ;
- this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : this.analyzer ;
+ (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
+ this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
openIndexWriter();
}
@@ -246,18 +233,17 @@ public class TextIndexLucene implements TextIndex {
doc.add(gField) ;
}
- for ( Field field : buildContentFields(entity) )
- doc.add(field);
+ String langField = docDef.getLangField() ;
- return doc ;
- }
-
- protected List<Field> buildContentFields(Entity entity) {
- List<Field> list = new ArrayList<>();
for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
- list.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
+ doc.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
+ if (langField != null) {
+ String lang = entity.getLanguage();
+ if (lang != null && !"".equals(lang))
+ doc.add(new Field(docDef.getLangField(), lang, StringField.TYPE_STORED));
+ }
}
- return list;
+ return doc ;
}
@Override
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index cdf7876..ce20294 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -18,29 +18,37 @@
package org.apache.jena.query.text;
+import org.apache.jena.query.text.analyzer.Util;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import java.io.IOException;
-import java.util.List;
public class TextIndexLuceneMultilingual extends TextIndexLucene {
- public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
- super(directory, def, null) ;
+ /**
+ * Constructs a new TextIndexLuceneMultilingual.
+ *
+ * @param directory The Lucene Directory for the index
+ * @param config The config definition for the index instantiation.
+ */
+ public TextIndexLuceneMultilingual(Directory directory, TextIndexConfig config) {
+ super(directory, config) ;
+
+ //multilingual index cannot work without lang field
+ if (config.getEntDef().getLangField() == null)
+ config.getEntDef().setLangField("lang");
}
@Override
protected void updateDocument(Entity entity) throws IOException {
Document doc = doc(entity);
Term term = new Term(getDocDef().getEntityField(), entity.getId());
- Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ Analyzer analyzer = Util.getLocalizedAnalyzer(entity.getLanguage());
if (analyzer == null)
analyzer = getAnalyzer();
getIndexWriter().updateDocument(term, doc, analyzer) ;
@@ -49,28 +57,19 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
@Override
protected void addDocument(Entity entity) throws IOException {
Document doc = doc(entity) ;
- Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ Analyzer analyzer = Util.getLocalizedAnalyzer(entity.getLanguage());
if (analyzer == null)
analyzer = getAnalyzer();
getIndexWriter().addDocument(doc, analyzer) ;
}
@Override
- protected List<Field> buildContentFields(Entity entity) {
- List<Field> list = super.buildContentFields(entity);
- String lang = entity.getLanguage();
- if (lang == null || "".equals(lang))
- lang = "undef";
- list.add( new Field("lang", lang, StringField.TYPE_STORED ) );
- return list;
- }
-
- @Override
protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
- String lang = queryString.substring( queryString.lastIndexOf(":") + 1);
- if (!"undef".equals(lang))
- analyzer = LuceneUtil.getLocalizedAnalyzer(lang);
-
+ if (queryString.contains(getDocDef().getLangField() + ":")) {
+ String lang = queryString.substring(queryString.lastIndexOf(":") + 1);
+ if (!"*".equals(lang))
+ analyzer = Util.getLocalizedAnalyzer(lang);
+ }
return super.preParseQuery(queryString, primaryField, analyzer);
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index 4fac00b..d568232 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -72,11 +72,9 @@ public class TextQueryPF extends PropertyFunctionBase {
throw new QueryBuildException("Subject is not a single node: " + argSubject) ;
if (argObject.isList()) {
- //extract of extra lang arg if present and if is usable (multilingual index).
+ //extract of extra lang arg if present and if is usable.
//arg is removed from the list to avoid conflict with order and args length
langArg = extractArg("lang", argObject);
- if (langArg == null && server instanceof TextIndexLuceneMultilingual)
- langArg = "undef";
List<Node> list = argObject.getArgList() ;
if (list.size() == 0)
@@ -210,10 +208,14 @@ public class TextQueryPF extends PropertyFunctionBase {
}
}
- //for multilingual index
- if (langArg != null) {
- String qs2 = "lang:" + langArg;
- queryString = "(" + queryString + ") AND " + qs2 ;
+ //for language-based search extension
+ if (server.getDocDef().getLangField() != null) {
+ String field = server.getDocDef().getLangField();
+ if (langArg != null) {
+ String qs2 = !"none".equals(langArg)?
+ field + ":" + langArg : "-" + field + ":*";
+ queryString = "(" + queryString + ") AND " + qs2;
+ }
}
Explain.explain(execCxt.getContext(), "Text query: "+queryString) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
new file mode 100644
index 0000000..11dd683
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer;
+
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.Version;
+import java.lang.reflect.Constructor;
+import java.util.Hashtable;
+
+public class Util {
+
+ private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
+ private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+
+ static {
+ initAnalyzerDefs();
+ }
+
+ public static Analyzer getLocalizedAnalyzer(String lang) {
+ return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+ }
+
+ public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
+ if (lang == null)
+ return null;
+
+ if (cache.containsKey(lang))
+ return cache.get(lang);
+
+ try {
+ Class<?> className = analyzersClasses.get(lang);
+ if (className == null)
+ return null;
+ Constructor constructor = className.getConstructor(Version.class);
+ Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
+ cache.put(lang, analyzer);
+ return analyzer;
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ private static void initAnalyzerDefs() {
+ analyzersClasses = new Hashtable<>();
+ analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
+ analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
+ analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
+ analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
+ analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
+ analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
+ analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
+ analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
+ analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
+ analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
+ analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
+ analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
+ analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
+ analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
+ analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
+ analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
+ analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
+ analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
+ analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
+ analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
+ analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
+ analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
+ analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
+ analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
+ analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
+ analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
+ analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
+ analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
+ analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
+ analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
index ca66f27..7604822 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
@@ -66,6 +66,9 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
" OPTIONAL {" ,
" ?eMap :graphField ?graphField" ,
" }",
+ " OPTIONAL {" ,
+ " ?eMap :langField ?langField" ,
+ " }",
"}") ;
ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1) ;
pss.setIri("eMap", root.getURI()) ;
@@ -87,6 +90,7 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
QuerySolution qsol1 = results.get(0) ;
String entityField = qsol1.getLiteral("entityField").getLexicalForm() ;
String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
+ String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null ;
Multimap<String, Node> mapDefs = HashMultimap.create() ;
@@ -155,7 +159,9 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
throw new TextIndexException("No definition of primary field '"+defaultField+"'") ;
}
- EntityDefinition docDef = new EntityDefinition(entityField, defaultField, graphField) ;
+ EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
+ docDef.setGraphField(graphField);
+ docDef.setLangField(langField);
for ( String f : mapDefs.keys() ) {
for ( Node p : mapDefs.get(f))
docDef.set(f, p) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
index 1e37c15..b9d83de 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
@@ -21,9 +21,9 @@ package org.apache.jena.query.text.assembler;
import org.apache.jena.assembler.Assembler;
import org.apache.jena.assembler.Mode;
import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.query.text.LuceneUtil;
import org.apache.jena.query.text.TextIndexException;
import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.query.text.analyzer.Util;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.lucene.analysis.Analyzer;
@@ -52,7 +52,7 @@ public class LocalizedAnalyzerAssembler extends AssemblerBase {
throw new TextIndexException("text:language property must be a string : " + node);
}
String lang = node.toString();
- return LuceneUtil.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+ return Util.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
} else {
return new StandardAnalyzer(TextIndexLucene.VER);
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 790dac7..021c003 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -30,7 +30,6 @@ public class TextAssembler
Assembler.general.implementWith(TextVocab.entityMap, new EntityDefinitionAssembler()) ;
Assembler.general.implementWith(TextVocab.textIndexSolr, new TextIndexSolrAssembler()) ;
Assembler.general.implementWith(TextVocab.textIndexLucene, new TextIndexLuceneAssembler()) ;
- Assembler.general.implementWith(TextVocab.textIndexLuceneMultilingual, new TextIndexLuceneMultilingualAssembler()) ;
Assembler.general.implementWith(TextVocab.standardAnalyzer, new StandardAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.simpleAnalyzer, new SimpleAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.keywordAnalyzer, new KeywordAnalyzerAssembler()) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 361841c..abc6c97 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -18,10 +18,6 @@
package org.apache.jena.query.text.assembler ;
-import static org.apache.jena.query.text.assembler.TextVocab.pDirectory ;
-import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap ;
-import static org.apache.jena.query.text.assembler.TextVocab.pQueryAnalyzer ;
-
import java.io.File ;
import java.io.IOException ;
@@ -30,10 +26,7 @@ import org.apache.jena.assembler.Mode ;
import org.apache.jena.assembler.assemblers.AssemblerBase ;
import org.apache.jena.atlas.io.IO ;
import org.apache.jena.atlas.lib.IRILib ;
-import org.apache.jena.query.text.EntityDefinition ;
-import org.apache.jena.query.text.TextDatasetFactory ;
-import org.apache.jena.query.text.TextIndex ;
-import org.apache.jena.query.text.TextIndexException ;
+import org.apache.jena.query.text.*;
import org.apache.jena.rdf.model.RDFNode ;
import org.apache.jena.rdf.model.Resource ;
import org.apache.jena.rdf.model.Statement ;
@@ -43,6 +36,8 @@ import org.apache.lucene.store.Directory ;
import org.apache.lucene.store.FSDirectory ;
import org.apache.lucene.store.RAMDirectory ;
+import static org.apache.jena.query.text.assembler.TextVocab.*;
+
public class TextIndexLuceneAssembler extends AssemblerBase {
/*
<#index> a :TextIndexLucene ;
@@ -77,7 +72,18 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
File dir = new File(path) ;
directory = FSDirectory.open(dir) ;
}
-
+
+ Analyzer analyzer = null;
+ Statement analyzerStatement = root.getProperty(pAnalyzer);
+ if (null != analyzerStatement) {
+ RDFNode aNode = analyzerStatement.getObject();
+ if (! aNode.isResource()) {
+ throw new TextIndexException("Text analyzer property is not a resource : " + aNode);
+ }
+ Resource analyzerResource = (Resource) aNode;
+ analyzer = (Analyzer) a.open(analyzerResource);
+ }
+
Analyzer queryAnalyzer = null;
Statement queryAnalyzerStatement = root.getProperty(pQueryAnalyzer);
if (null != queryAnalyzerStatement) {
@@ -89,10 +95,24 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
queryAnalyzer = (Analyzer) a.open(analyzerResource);
}
+ boolean isMultilingualSupport = false;
+ Statement mlSupportStatement = root.getProperty(pMultilingualSupport);
+ if (null != mlSupportStatement) {
+ RDFNode mlsNode = mlSupportStatement.getObject();
+ if (! mlsNode.isLiteral()) {
+ throw new TextIndexException("text:multilingualSupport property must be a string : " + mlsNode);
+ }
+ isMultilingualSupport = mlsNode.asLiteral().getBoolean();
+ }
+
Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
EntityDefinition docDef = (EntityDefinition)a.open(r) ;
+ TextIndexConfig config = new TextIndexConfig(docDef);
+ config.setAnalyzer(analyzer);
+ config.setQueryAnalyzer(queryAnalyzer);
+ config.setMultilingualSupport(isMultilingualSupport);
- return TextDatasetFactory.createLuceneIndex(directory, docDef, queryAnalyzer) ;
+ return TextDatasetFactory.createLuceneIndex(directory, config) ;
} catch (IOException e) {
IO.exception(e) ;
return null ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
deleted file mode 100644
index a36fcbe..0000000
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.query.text.assembler;
-
-import org.apache.jena.assembler.Assembler;
-import org.apache.jena.assembler.Mode;
-import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.atlas.io.IO;
-import org.apache.jena.atlas.lib.IRILib;
-import org.apache.jena.query.text.EntityDefinition;
-import org.apache.jena.query.text.TextDatasetFactory;
-import org.apache.jena.query.text.TextIndex;
-import org.apache.jena.query.text.TextIndexException;
-import org.apache.jena.rdf.model.RDFNode;
-import org.apache.jena.rdf.model.Resource;
-import org.apache.jena.sparql.util.graph.GraphUtils;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.RAMDirectory;
-
-import java.io.File;
-import java.io.IOException;
-
-import static org.apache.jena.query.text.assembler.TextVocab.pDirectory;
-import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap;
-
-public class TextIndexLuceneMultilingualAssembler extends AssemblerBase {
- /*
- <#index> a :TextIndexLuceneMultilingual ;
- #text:directory "mem" ;
- #text:directory "DIR" ;
- text:directory <file:DIR> ;
- text:entityMap <#endMap> ;
- .
- */
-
- @SuppressWarnings("resource")
- @Override
- public TextIndex open(Assembler a, Resource root, Mode mode) {
- try {
- if ( !GraphUtils.exactlyOneProperty(root, pDirectory) )
- throw new TextIndexException("No 'text:directory' property on " + root) ;
-
- Directory directory ;
-
- RDFNode n = root.getProperty(pDirectory).getObject() ;
- if ( n.isLiteral() ) {
- String literalValue = n.asLiteral().getLexicalForm() ;
- if (literalValue.equals("mem")) {
- directory = new RAMDirectory() ;
- } else {
- File dir = new File(literalValue) ;
- directory = FSDirectory.open(dir) ;
- }
- } else {
- Resource x = n.asResource() ;
- String path = IRILib.IRIToFilename(x.getURI()) ;
- File dir = new File(path) ;
- directory = FSDirectory.open(dir) ;
- }
-
- Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
- EntityDefinition docDef = (EntityDefinition)a.open(r) ;
-
- return TextDatasetFactory.createLuceneIndexMultilingual(directory, docDef) ;
- } catch (IOException e) {
- IO.exception(e) ;
- return null ;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 79c223e..802990d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -36,10 +36,10 @@ public class TextVocab
public static final Resource textIndex = Vocab.resource(NS, "TextIndex") ;
public static final Resource textIndexSolr = Vocab.resource(NS, "TextIndexSolr") ;
public static final Resource textIndexLucene = Vocab.resource(NS, "TextIndexLucene") ;
- public static final Resource textIndexLuceneMultilingual = Vocab.resource(NS, "TextIndexLuceneMultilingual") ;
public static final Property pLanguage = Vocab.property(NS, "language") ;
public static final Property pServer = Vocab.property(NS, "server") ; // Solr
public static final Property pDirectory = Vocab.property(NS, "directory") ; // Lucene
+ public static final Property pMultilingualSupport = Vocab.property(NS, "multilingualSupport") ;
public static final Property pQueryAnalyzer = Vocab.property(NS, "queryAnalyzer") ;
public static final Property pEntityMap = Vocab.property(NS, "entityMap") ;
@@ -47,6 +47,8 @@ public class TextVocab
public static final Resource entityMap = Vocab.resource(NS, "EntityMap") ;
public static final Property pEntityField = Vocab.property(NS, "entityField") ;
public static final Property pDefaultField = Vocab.property(NS, "defaultField") ;
+ public static final Property pGraphField = Vocab.property(NS, "graphField") ;
+ public static final Property pLangField = Vocab.property(NS, "langField") ;
public static final Property pMap = Vocab.property(NS, "map") ;
public static final Property pField = Vocab.property(NS, "field") ;
public static final Property pPredicate = Vocab.property(NS, "predicate") ;
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
index 56a81b6..1670f63 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
@@ -36,9 +36,11 @@ public class AbstractTestDatasetWithLuceneGraphTextIndex extends AbstractTestDat
public void init() {
Dataset ds1 = TDBFactory.createDataset() ;
Directory dir = new RAMDirectory() ;
- EntityDefinition eDef = new EntityDefinition("iri", "text", "graph", RDFS.label.asNode()) ;
+ EntityDefinition eDef = new EntityDefinition("iri", "text");
+ eDef.setGraphField("graph");
+ eDef.setPrimaryPredicate(RDFS.label.asNode());
eDef.set("comment", RDFS.comment.asNode()) ; // some tests require indexing rdfs:comment
- TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
+ TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
dataset = TextDatasetFactory.create(ds1, tidx) ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 0219675..6d1cb25 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -31,6 +31,7 @@ import org.junit.runners.Suite.SuiteClasses ;
TestBuildTextDataset.class
, TestDatasetWithLuceneTextIndex.class
, TestDatasetWithLuceneMultilingualTextIndex.class
+ , TestDatasetWithLuceneTextIndexWithLangField.class
, TestDatasetWithLuceneGraphTextIndex.class
// Embedded solr not supported
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
index 2c3564d..02d02f9 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
@@ -110,13 +110,14 @@ public class TestBuildTextDataset extends BaseTest
Dataset ds1 = DatasetFactory.createMem() ;
// Define the index mapping
- EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+ EntityDefinition entDef = new EntityDefinition("uri", "text");
+ entDef.setPrimaryPredicate(RDFS.label.asNode());
// Lucene, in memory.
Directory dir = new RAMDirectory() ;
// Join together into a dataset
- Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
+ Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef)) ;
return ds ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
index 58a78f1..53e2426 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
@@ -57,7 +57,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
"",
"[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
"text:TextDataset rdfs:subClassOf ja:RDFDataset .",
- "text:TextIndexLuceneMultilingual rdfs:subClassOf text:TextIndex .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
":" + SPEC_ROOT_LOCAL,
" a text:TextDataset ;",
@@ -74,8 +74,9 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
".",
"",
":indexLucene",
- " a text:TextIndexLuceneMultilingual ;",
+ " a text:TextIndexLucene ;",
" text:directory \"mem\" ;",
+ " text:multilingualSupport true ;",
" text:entityMap :entMap ;",
" .",
"",
@@ -83,6 +84,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" a text:EntityMap ;",
" text:entityField \"uri\" ;",
" text:defaultField \"label\" ;",
+ " text:langField \"lang\" ;",
" text:map (",
" [ text:field \"label\" ; text:predicate rdfs:label ]",
" [ text:field \"comment\" ; text:predicate rdfs:comment ]",
@@ -113,7 +115,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
QUERY_PROLOG,
"SELECT ?s",
"WHERE {",
- " ?s text:query ( rdfs:label \"book\" \"lang:en\" 10 ) .",
+ " ?s text:query ( rdfs:label 'book' 'lang:en' 10 ) .",
"}"
);
doTestSearch(turtle, queryString, new HashSet<String>());
@@ -131,7 +133,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" rdfs:label 'Er schluckte gift'@de",
"."
);
- // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
String queryString = StrUtils.strjoinNL(
QUERY_PROLOG,
"SELECT ?s",
@@ -156,7 +157,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" rdfs:label 'Er schluckte gift'@de",
"."
);
- // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
String queryString = StrUtils.strjoinNL(
QUERY_PROLOG,
"SELECT ?s",
@@ -177,7 +177,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
" rdfs:label 'I met some engineers'@en",
"."
);
- // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
String queryString = StrUtils.strjoinNL(
QUERY_PROLOG,
"SELECT ?s",
@@ -191,6 +190,29 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
}
@Test
+ public void testRetrievingUnlocalizedResource(){
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testLocalizedResource>",
+ " rdfs:label 'A localized text'@en",
+ ".",
+ "<" + RESOURCE_BASE + "testUnlocalizedResource>",
+ " rdfs:label 'An unlocalized text'",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'text' 'lang:none' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testUnlocalizedResource")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+
+ @Test
public void testRetrievingSKOSConcepts() {
String queryString = StrUtils.strjoinNL(
"PREFIX text: <http://jena.apache.org/text#>",
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
new file mode 100644
index 0000000..9d99a29
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.Dataset;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Resource;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TestDatasetWithLuceneTextIndexWithLangField extends AbstractTestDatasetWithTextIndex {
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory \"mem\" ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:langField \"language\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; text:predicate rdfs:label ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ @Before
+ public void before() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+ @After
+ public void after() {
+ dataset.close();
+ }
+
+ @Test
+ public void testLiteralLanguageSearch(){
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "ParisInEnglish>",
+ " rdfs:label 'Paris, capital of France'@en",
+ ".",
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "ParisInFrench>",
+ " rdfs:label 'Paris, capitale de la France'@fr",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'paris' 'lang:en' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/ParisInEnglish")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
index dc02671..6e743a2 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
@@ -48,7 +48,9 @@ public class TestLuceneWithMultipleThreads
private static final EntityDefinition entDef;
static {
- entDef = new EntityDefinition("uri", "label", "graph", RDFS.label.asNode());
+ entDef = new EntityDefinition("uri", "label");
+ entDef.setGraphField("graph");
+ entDef.setPrimaryPredicate(RDFS.label.asNode());
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
entDef.setAnalyzer("label", analyzer);
}
@@ -56,7 +58,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testReadInMiddleOfWrite() throws InterruptedException, ExecutionException
{
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), new TextIndexConfig(entDef));
final Dataset ds = DatasetFactory.create(dsg);
final ExecutorService execService = Executors.newSingleThreadExecutor();
final Future<?> f = execService.submit(new Runnable()
@@ -112,7 +114,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testWriteInMiddleOfRead() throws InterruptedException, ExecutionException
{
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), new TextIndexConfig(entDef));
final int numReads = 10;
final Dataset ds = DatasetFactory.create(dsg);
final ExecutorService execService = Executors.newFixedThreadPool(10);
@@ -180,7 +182,7 @@ public class TestLuceneWithMultipleThreads
@Test
public void testIsolation() throws InterruptedException, ExecutionException {
- final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), entDef, null);
+ final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), new TextIndexConfig(entDef));
final int numReaders = 2;
final List<Future<?>> futures = new ArrayList<Future<?>>(numReaders);
http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
index f3307f0..fa8a08a 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
@@ -38,8 +38,9 @@ public class TestTextTDB extends BaseTest
private static Dataset create() {
Dataset ds1 = TDBFactory.createDataset() ;
Directory dir = new RAMDirectory() ;
- EntityDefinition eDef = new EntityDefinition("iri", "text", RDFS.label) ;
- TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
+ EntityDefinition eDef = new EntityDefinition("iri", "text");
+ eDef.setPrimaryPredicate(RDFS.label);
+ TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
Dataset ds = TextDatasetFactory.create(ds1, tidx) ;
return ds ;
}