You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/05/25 14:10:52 UTC

[1/6] jena git commit: Implementation of jena-text multilingual with a single index

Repository: jena
Updated Branches:
  refs/heads/master dc19466e7 -> 66a1eda82


Implementation of jena-text multilingual with a single index


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/9553c6b2
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/9553c6b2
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/9553c6b2

Branch: refs/heads/master
Commit: 9553c6b2c246bc9c05906096c1f56d65ba15fed8
Parents: 7ef374f
Author: Alexis Miara <al...@hotmail.com>
Authored: Wed May 13 11:23:56 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Wed May 13 11:23:56 2015 -0400

----------------------------------------------------------------------
 .../java/org/apache/jena/query/text/Entity.java |   9 +-
 .../org/apache/jena/query/text/LuceneUtil.java  | 150 +++++++++++++
 .../jena/query/text/TextDatasetFactory.java     | 101 ++++++++-
 .../org/apache/jena/query/text/TextIndex.java   |   3 +
 .../apache/jena/query/text/TextIndexLucene.java |  64 +++++-
 .../query/text/TextIndexLuceneMultilingual.java |  33 +++
 .../apache/jena/query/text/TextIndexSolr.java   |   5 +
 .../apache/jena/query/text/TextQueryFuncs.java  |   9 +-
 .../org/apache/jena/query/text/TextQueryPF.java |  41 +++-
 .../assembler/LocalizedAnalyzerAssembler.java   |  60 ++++++
 .../query/text/assembler/TextAssembler.java     |   2 +
 .../TextIndexLuceneMultilingualAssembler.java   |  87 ++++++++
 .../jena/query/text/assembler/TextVocab.java    |   3 +
 .../org/apache/jena/query/text/TS_Text.java     |   2 +
 .../text/TestDatasetWithLocalizedAnalyzer.java  | 147 +++++++++++++
 ...tDatasetWithLuceneMultilingualTextIndex.java | 216 +++++++++++++++++++
 jena-text/testing/TextQuery/data.skos           |  36 ++++
 17 files changed, 946 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
index d770c5a..c7757a3 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/Entity.java
@@ -25,13 +25,18 @@ public class Entity
 {
     private final String id ;
     private final String graph ;
+    private final String language ;
     private final Map<String, Object> map = new HashMap<>() ;
 
     public Entity(String entityId, String entityGraph) {
+        this(entityId, entityGraph, null);
+    }
+
+    public Entity(String entityId, String entityGraph, String lang) {
         this.id = entityId ;
         this.graph = entityGraph;
+        this.language = lang;
     }
-
     /** @deprecated Use {@linkplain #Entity(String, String)} */
     @Deprecated
     public Entity(String entityId)          { this(entityId, null) ; }
@@ -40,6 +45,8 @@ public class Entity
 
     public String getGraph()                { return graph ; }
 
+    public String getLanguage()                { return language ; }
+
     public void put(String key, Object value)
     { map.put(key, value) ; }
     

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
new file mode 100644
index 0000000..7fafc4c
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.Version;
+import java.lang.reflect.Constructor;
+import java.util.Hashtable;
+
+public class LuceneUtil {
+
+    private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
+    private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+
+    static {
+        initAnalyzerDefs();
+    }
+
+    public static Analyzer getLocalizedAnalyzer(String lang) {
+        return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+    }
+
+    public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
+        lang = getISO2Language(lang);
+        if (lang == null)
+            return null;
+
+        if (cache.containsKey(lang))
+            return cache.get(lang);
+
+        try {
+            Class<?> className = analyzersClasses.get(lang);
+            if (className == null)
+                return null;
+            Constructor constructor = className.getConstructor(Version.class);
+            Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
+            cache.put(lang, analyzer);
+            return analyzer;
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+    public static String getISO2Language(String lang) {
+        if (lang != null) {
+            lang = lang.split("-")[0].toLowerCase();
+            if (lang.length() == 2)
+                return lang;
+            else {
+                if ("ara".equals(lang)) return "ar";
+                if ("bul".equals(lang)) return "bg";
+                if ("cat".equals(lang)) return "ca";
+                if ("ces".equals(lang)) return "cs";
+                if ("cze".equals(lang)) return "cs";
+                if ("dan".equals(lang)) return "da";
+                if ("deu".equals(lang)) return "de";
+                if ("ger".equals(lang)) return "de";
+                if ("ell".equals(lang)) return "el";
+                if ("gre".equals(lang)) return "el";
+                if ("eng".equals(lang)) return "en";
+                if ("spa".equals(lang)) return "es";
+                if ("eus".equals(lang)) return "eu";
+                if ("baq".equals(lang)) return "eu";
+                if ("fas".equals(lang)) return "fa";
+                if ("per".equals(lang)) return "fa";
+                if ("fin".equals(lang)) return "fi";
+                if ("fra".equals(lang)) return "fr";
+                if ("fre".equals(lang)) return "fr";
+                if ("gle".equals(lang)) return "ga";
+                if ("glg".equals(lang)) return "gl";
+                if ("hin".equals(lang)) return "hi";
+                if ("hun".equals(lang)) return "hu";
+                if ("hye".equals(lang)) return "hy";
+                if ("arm".equals(lang)) return "hy";
+                if ("ind".equals(lang)) return "id";
+                if ("ita".equals(lang)) return "it";
+                if ("jpn".equals(lang)) return "jp";
+                if ("kor".equals(lang)) return "ko";
+                if ("lav".equals(lang)) return "lv";
+                if ("nld".equals(lang)) return "nl";
+                if ("dut".equals(lang)) return "nl";
+                if ("nor".equals(lang)) return "no";
+                if ("por".equals(lang)) return "pt";
+                if ("ron".equals(lang)) return "ro";
+                if ("rum".equals(lang)) return "ro";
+                if ("rus".equals(lang)) return "ru";
+                if ("swe".equals(lang)) return "sv";
+                if ("tha".equals(lang)) return "th";
+                if ("tur".equals(lang)) return "tr";
+                if ("zho".equals(lang)) return "zh";
+                if ("chi".equals(lang)) return "zh";
+            }
+        }
+
+        return null;
+    }
+
+    private static void initAnalyzerDefs() {
+        analyzersClasses = new Hashtable<>();
+        analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
+        analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
+        analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
+        analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
+        analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
+        analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
+        analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
+        analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
+        analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
+        analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
+        analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
+        analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
+        analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
+        analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
+        analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
+        analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
+        analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
+        analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
+        analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
+        analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
+        analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+        analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+        analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
+        analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
+        analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
+        analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
+        analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
+        analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
+        analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
+        analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
+        analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
+        analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index 2b4e7ff..dd48bfa 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -83,49 +83,132 @@ public class TextDatasetFactory
         
         return dsgt ;
     }
-    
+
     /**
      * Create a Lucene TextIndex
-     * 
+     *
      * @param directory The Lucene Directory for the index
      * @param def The EntityDefinition that defines how entities are stored in the index
      * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
-     */ 
+     */
     public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
     {
         TextIndex index = new TextIndexLucene(directory, def, queryAnalyzer) ;
+        return index ;
+    }
+
+    /**
+     * Create a Lucene TextIndex
+     * 
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     */ 
+    public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
+    {
+        TextIndex index = new TextIndexLucene(directory, def, analyzer, queryAnalyzer) ;
         return index ; 
     }
 
-    /** 
+    /**
+     * Create a multilingual Lucene TextIndex
+     *
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     */
+    public static TextIndex createLuceneIndexMultilingual(Directory directory, EntityDefinition def)
+    {
+        TextIndex index = new TextIndexLuceneMultilingual(directory, def) ;
+        return index ;
+    }
+
+    /**
      * Create a text-indexed dataset, using Lucene
-     * 
+     *
      * @param base the base Dataset
      * @param directory The Lucene Directory for the index
      * @param def The EntityDefinition that defines how entities are stored in the index
      * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
-     */ 
+     */
     public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
     {
         TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
-        return create(base, index, true) ; 
+        return create(base, index, true) ;
     }
 
     /**
      * Create a text-indexed dataset, using Lucene
      * 
-     * @param base the base DatasetGraph
+     * @param base the base Dataset
      * @param directory The Lucene Directory for the index
      * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
      * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
      */ 
+    public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
+    {
+        TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
+        return create(base, index, true) ; 
+    }
+
+    /**
+     * Create a multilingual text-indexed dataset, using Lucene
+     *
+     * @param base the base Dataset
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     */
+    public static Dataset createLuceneMultilingual(Dataset base, Directory directory, EntityDefinition def)
+    {
+        TextIndex index = createLuceneIndexMultilingual(directory, def) ;
+        return create(base, index, true) ;
+    }
+
+    /**
+     * Create a text-indexed dataset, using Lucene
+     *
+     * @param base the base DatasetGraph
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     */
     public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
     {
         TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
+        return create(base, index, true) ;
+    }
+
+    /**
+     * Create a text-indexed dataset, using Lucene
+     * 
+     * @param base the base DatasetGraph
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     */ 
+    public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
+    {
+        TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
         return create(base, index, true) ; 
     }
 
-    /** Create a Solr TextIndex */ 
+    /**
+     * Create a multilingual text-indexed dataset, using Lucene
+     *
+     * @param base the base DatasetGraph
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     */
+    public static DatasetGraph createLuceneMultilingual(DatasetGraph base, Directory directory, EntityDefinition def)
+    {
+        TextIndex index = createLuceneIndexMultilingual(directory, def) ;
+        return create(base, index, true) ;
+    }
+
+
+    /** Create a Solr TextIndex */
     public static TextIndex createSolrIndex(SolrServer server, EntityDefinition entMap)
     {
         TextIndex index = new TextIndexSolr(server, entMap) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
index 69efb31..ffe92e7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
@@ -51,4 +51,7 @@ public interface TextIndex extends Closeable //, Transactional
     List<Node> query(String qs) ;
 
     EntityDefinition getDocDef() ;
+
+    //localization
+    boolean isMultilingual() ;
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 9107e86..004c242 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -85,12 +85,24 @@ public class TextIndexLucene implements TextIndex {
 
     /**
      * Constructs a new TextIndexLucene.
-     * 
+     *
      * @param directory The Lucene Directory for the index
      * @param def The EntityDefinition that defines how entities are stored in the index
      * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
      */
     public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer queryAnalyzer) {
+        this(directory, def, null, queryAnalyzer);
+    }
+
+    /**
+     * Constructs a new TextIndexLucene.
+     * 
+     * @param directory The Lucene Directory for the index
+     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
+     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     */
+    public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer) {
         this.directory = directory ;
         this.docDef = def ;
 
@@ -102,14 +114,15 @@ public class TextIndexLucene implements TextIndex {
             analyzerPerField.put(def.getGraphField(), new KeywordAnalyzer()) ;
 
         for (String field : def.fields()) {
-            Analyzer analyzer = def.getAnalyzer(field);
-            if (analyzer != null) {
-                analyzerPerField.put(field, analyzer);
+            Analyzer _analyzer = def.getAnalyzer(field);
+            if (_analyzer != null) {
+                analyzerPerField.put(field, _analyzer);
             }
         }
 
-        this.analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(VER), analyzerPerField) ;
-        this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : analyzer ;
+        this.analyzer = new PerFieldAnalyzerWrapper(
+                (null != analyzer) ? analyzer : new StandardAnalyzer(VER), analyzerPerField) ;
+        this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : this.analyzer ;
 
         openIndexWriter();
     }
@@ -194,8 +207,15 @@ public class TextIndexLucene implements TextIndex {
             log.debug("Update entity: " + entity) ;
         try {
             Document doc = doc(entity);
+            Analyzer analyzer = null;
+            if (isMultilingual())
+                analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
             Term term = new Term(docDef.getEntityField(), entity.getId());
-            indexWriter.updateDocument(term, doc);
+
+            if (analyzer != null)
+                indexWriter.updateDocument(term, doc, analyzer) ;
+            else //use the default one
+                indexWriter.updateDocument(term, doc);
         } catch (IOException e) {
             throw new TextIndexException(e) ;
         }
@@ -207,7 +227,14 @@ public class TextIndexLucene implements TextIndex {
             log.debug("Add entity: " + entity) ;
         try {
             Document doc = doc(entity) ;
-            indexWriter.addDocument(doc) ;
+            Analyzer analyzer = null;
+            if (isMultilingual())
+                analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+
+            if (analyzer != null)
+                indexWriter.addDocument(doc, analyzer) ;
+            else //use the default one
+                indexWriter.addDocument(doc) ;
         }
         catch (IOException e) {
             throw new TextIndexException(e) ;
@@ -228,6 +255,13 @@ public class TextIndexLucene implements TextIndex {
         for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
             Field field = new Field(e.getKey(), (String)e.getValue(), ftText) ;
             doc.add(field) ;
+            if (isMultilingual()) {
+                String lang =  entity.getLanguage();
+                if (lang == null || "".equals(lang))
+                    lang = "undef";
+                field = new Field("lang", lang, StringField.TYPE_STORED ) ;
+                doc.add(field) ;
+            }
         }
         return doc ;
     }
@@ -305,7 +339,14 @@ public class TextIndexLucene implements TextIndex {
 
     private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
-        Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+        Analyzer qAnalyzer = queryAnalyzer;
+        if (isMultilingual()) {//index and query analyzer must be the same
+            String lang = qs.substring( qs.lastIndexOf(":") + 1);
+            if (!"undef".equals(lang))
+                qAnalyzer = LuceneUtil.getLocalizedAnalyzer(lang);
+        }
+
+        Query query = parseQuery(qs, docDef.getPrimaryField(), qAnalyzer) ;
         if ( limit <= 0 )
             limit = MAX_N ;
         ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
@@ -329,6 +370,11 @@ public class TextIndexLucene implements TextIndex {
         return docDef ;
     }
 
+    @Override
+    public boolean isMultilingual() {
+        return false;
+    }
+
     private Node entryToNode(String v) {
         // TEMP
         return NodeFactoryExtra.createLiteralNode(v, null, null) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
new file mode 100644
index 0000000..86b34e6
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.lucene.store.Directory;
+
+public class TextIndexLuceneMultilingual extends TextIndexLucene {
+
+    public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
+        super(directory, def, null) ;
+    }
+
+    @Override
+    public boolean isMultilingual() {
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
index 54a3263..5be898c 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
@@ -231,6 +231,11 @@ public class TextIndexSolr implements TextIndex
         return docDef ;
     }
 
+    @Override
+    public boolean isMultilingual() {
+        return false;
+    }
+
     private Node entryToNode(String v)
     {
         // TEMP

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
index 512297e..d628c4a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryFuncs.java
@@ -46,6 +46,12 @@ public class TextQueryFuncs {
         return nodeToString(g) ;
     }
 
+    /** retrieve language (if exists) if object is literal */
+    public static String getLiteralLanguage(Node o) {
+        String lang = o.getLiteral().language();
+        return lang;
+    }
+
     private static String nodeToString(Node n) {
         return (n.isURI() ) ? n.getURI() : "_:" + n.getBlankNodeLabel() ;
     }
@@ -77,7 +83,8 @@ public class TextQueryFuncs {
     
         String x = TextQueryFuncs.subjectToString(s) ;
         String graphText = TextQueryFuncs.graphNodeToString(g) ;
-        Entity entity = new Entity(x, graphText) ;
+        String language = TextQueryFuncs.getLiteralLanguage(o) ;
+        Entity entity = new Entity(x, graphText, language) ;
         String graphField = defn.getGraphField() ;
         if ( defn.getGraphField() != null )
             entity.put(graphField, graphText) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index 140ef09..cc7e4f6 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -18,6 +18,7 @@
 
 package org.apache.jena.query.text ;
 
+import java.util.Iterator;
 import java.util.List ;
 
 import org.apache.jena.atlas.iterator.Iter ;
@@ -57,6 +58,8 @@ public class TextQueryPF extends PropertyFunctionBase {
 
     public TextQueryPF() {}
 
+    private String langArg = null;
+
     @Override
     public void build(PropFuncArg argSubject, Node predicate, PropFuncArg argObject, ExecutionContext execCxt) {
         super.build(argSubject, predicate, argObject, execCxt) ;
@@ -69,6 +72,14 @@ public class TextQueryPF extends PropertyFunctionBase {
             throw new QueryBuildException("Subject is not a single node: " + argSubject) ;
 
         if (argObject.isList()) {
+            //extract of extra lang arg if present and if is usable (multilingual index).
+            //arg is removed from the list to avoid conflict with order and args length
+            if (server.isMultilingual()) {
+                langArg = extractArg("lang", argObject);
+                if (langArg == null)
+                    langArg = "undef";
+            }
+
             List<Node> list = argObject.getArgList() ;
             if (list.size() == 0)
                 throw new QueryBuildException("Zero-length argument list") ;
@@ -100,6 +111,26 @@ public class TextQueryPF extends PropertyFunctionBase {
         return null ;
     }
 
+    private String extractArg(String prefix, PropFuncArg argObject) {
+        String value = null;
+        int pos = 0;
+        for (Iterator it = argObject.getArgList().iterator(); it.hasNext(); ) {
+            Node node = (Node)it.next();
+            if (node.isLiteral()) {
+                String arg = node.getLiteral().toString();
+                if (arg.startsWith(prefix + ":")) {
+                    value = arg.split(":")[1];
+                    break;
+                }
+            }
+            pos++;
+        }
+        if (value != null)
+            argObject.getArgList().remove(pos);
+
+        return value;
+    }
+
     @Override
     public QueryIterator exec(Binding binding, PropFuncArg argSubject, Node predicate, PropFuncArg argObject,
                               ExecutionContext execCxt) {
@@ -179,8 +210,14 @@ public class TextQueryPF extends PropertyFunctionBase {
                 String qs2 = server.getDocDef().getGraphField() + ":" + escaped ;
                 queryString = "(" + queryString + ") AND " + qs2 ;
             }
-        } 
-    
+        }
+
+        //for multilingual index
+        if (langArg != null) {
+            String qs2 = "lang:" + langArg;
+            queryString = "(" + queryString + ") AND " + qs2 ;
+        }
+
         Explain.explain(execCxt.getContext(), "Text query: "+queryString) ;
         if ( log.isDebugEnabled())
             log.debug("Text query: {} ({})", queryString,limit) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
new file mode 100644
index 0000000..1e37c15
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.query.text.LuceneUtil;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+/**
+ * Assembler to create localized analyzer.
+ */
+public class LocalizedAnalyzerAssembler extends AssemblerBase {
+    /*
+    text:map (
+         [ text:field "text" ; 
+           text:predicate rdfs:label;
+           text:analyzer [
+               a  lucene:LocalizedAnalyzer ;
+               text:language "en" ;
+         ]
+        .
+     */
+
+    @Override
+    public Analyzer open(Assembler a, Resource root, Mode mode) {
+        if (root.hasProperty(TextVocab.pLanguage)) {
+            RDFNode node = root.getProperty(TextVocab.pLanguage).getObject();
+            if (! node.isLiteral()) {
+                throw new TextIndexException("text:language property must be a string : " + node);
+            }
+            String lang = node.toString();
+            return LuceneUtil.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+        } else {
+            return new StandardAnalyzer(TextIndexLucene.VER);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index d901bc7..790dac7 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -30,10 +30,12 @@ public class TextAssembler
         Assembler.general.implementWith(TextVocab.entityMap,        new EntityDefinitionAssembler()) ;
         Assembler.general.implementWith(TextVocab.textIndexSolr,    new TextIndexSolrAssembler()) ; 
         Assembler.general.implementWith(TextVocab.textIndexLucene,  new TextIndexLuceneAssembler()) ;
+        Assembler.general.implementWith(TextVocab.textIndexLuceneMultilingual,  new TextIndexLuceneMultilingualAssembler()) ;
         Assembler.general.implementWith(TextVocab.standardAnalyzer, new StandardAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.simpleAnalyzer,   new SimpleAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.keywordAnalyzer,  new KeywordAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ;
+        Assembler.general.implementWith(TextVocab.localizedAnalyzer, new LocalizedAnalyzerAssembler()) ;
     }
 }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
new file mode 100644
index 0000000..a36fcbe
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.atlas.io.IO;
+import org.apache.jena.atlas.lib.IRILib;
+import org.apache.jena.query.text.EntityDefinition;
+import org.apache.jena.query.text.TextDatasetFactory;
+import org.apache.jena.query.text.TextIndex;
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.sparql.util.graph.GraphUtils;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.File;
+import java.io.IOException;
+
+import static org.apache.jena.query.text.assembler.TextVocab.pDirectory;
+import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap;
+
+public class TextIndexLuceneMultilingualAssembler extends AssemblerBase {
+    /*
+    <#index> a :TextIndexLuceneMultilingual ;
+        #text:directory "mem" ;
+        #text:directory "DIR" ;
+        text:directory <file:DIR> ;
+        text:entityMap <#endMap> ;
+        .
+    */
+    
+    @SuppressWarnings("resource")
+    @Override
+    public TextIndex open(Assembler a, Resource root, Mode mode) {
+        try {
+            if ( !GraphUtils.exactlyOneProperty(root, pDirectory) )
+                throw new TextIndexException("No 'text:directory' property on " + root) ;
+
+            Directory directory ;
+
+            RDFNode n = root.getProperty(pDirectory).getObject() ;
+            if ( n.isLiteral() ) {
+                String literalValue = n.asLiteral().getLexicalForm() ;
+                if (literalValue.equals("mem")) {
+                    directory = new RAMDirectory() ;
+                } else {
+                    File dir = new File(literalValue) ;
+                    directory = FSDirectory.open(dir) ;
+                }
+            } else {
+                Resource x = n.asResource() ;
+                String path = IRILib.IRIToFilename(x.getURI()) ;
+                File dir = new File(path) ;
+                directory = FSDirectory.open(dir) ;
+            }
+
+            Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
+            EntityDefinition docDef = (EntityDefinition)a.open(r) ;
+
+            return TextDatasetFactory.createLuceneIndexMultilingual(directory, docDef) ;
+        } catch (IOException e) {
+            IO.exception(e) ;
+            return null ;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index a835a6f..79c223e 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -36,6 +36,8 @@ public class TextVocab
     public static final Resource textIndex          = Vocab.resource(NS, "TextIndex") ;
     public static final Resource textIndexSolr      = Vocab.resource(NS, "TextIndexSolr") ;
     public static final Resource textIndexLucene    = Vocab.resource(NS, "TextIndexLucene") ;
+    public static final Resource textIndexLuceneMultilingual    = Vocab.resource(NS, "TextIndexLuceneMultilingual") ;
+    public static final Property pLanguage          = Vocab.property(NS, "language") ;
     public static final Property pServer            = Vocab.property(NS, "server") ;            // Solr
     public static final Property pDirectory         = Vocab.property(NS, "directory") ;         // Lucene
     public static final Property pQueryAnalyzer     = Vocab.property(NS, "queryAnalyzer") ;
@@ -57,6 +59,7 @@ public class TextVocab
     public static final Resource simpleAnalyzer     = Vocab.resource(NS, "SimpleAnalyzer");
     public static final Resource keywordAnalyzer    = Vocab.resource(NS, "KeywordAnalyzer");
     public static final Resource lowerCaseKeywordAnalyzer    = Vocab.resource(NS, "LowerCaseKeywordAnalyzer");
+    public static final Resource localizedAnalyzer    = Vocab.resource(NS, "LocalizedAnalyzer");
 
 }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 115b493..0219675 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -30,6 +30,7 @@ import org.junit.runners.Suite.SuiteClasses ;
 @SuiteClasses({
     TestBuildTextDataset.class
     , TestDatasetWithLuceneTextIndex.class
+    , TestDatasetWithLuceneMultilingualTextIndex.class
     , TestDatasetWithLuceneGraphTextIndex.class
     
     // Embedded solr not supported 
@@ -45,6 +46,7 @@ import org.junit.runners.Suite.SuiteClasses ;
     , TestDatasetWithKeywordAnalyzer.class
     , TestDatasetWithLowerCaseKeywordAnalyzer.class
     , TestLuceneWithMultipleThreads.class
+    , TestDatasetWithLocalizedAnalyzer.class
 })
 
 public class TS_Text

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java
new file mode 100644
index 0000000..5becf5b
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLocalizedAnalyzer.java
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.Dataset;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Resource;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * This class defines a setup configuration for a dataset that uses a simple analyzer with a Lucene index.
+ */
+public class TestDatasetWithLocalizedAnalyzer extends AbstractTestDatasetWithTextIndexBase {
+    private static final String INDEX_PATH = "target/test/TestDatasetWithLuceneIndex";
+    private static final File indexDir = new File(INDEX_PATH);
+    
+    private static final String SPEC_BASE = "http://example.org/spec#";
+    private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+    private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+    private static final String SPEC;
+    static {
+        SPEC = StrUtils.strjoinNL(
+                    "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+                    "prefix ja:   <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+                    "prefix tdb:  <http://jena.hpl.hp.com/2008/tdb#>",
+                    "prefix text: <http://jena.apache.org/text#>",
+                    "prefix :     <" + SPEC_BASE + ">",
+                    "",
+                    "[] ja:loadClass    \"org.apache.jena.query.text.TextQuery\" .",
+                    "text:TextDataset      rdfs:subClassOf   ja:RDFDataset .",
+                    "text:TextIndexLucene  rdfs:subClassOf   text:TextIndex .",
+                    
+                    ":" + SPEC_ROOT_LOCAL,
+                    "    a              text:TextDataset ;",
+                    "    text:dataset   :dataset ;",
+                    "    text:index     :indexLucene ;",
+                    "    .",
+                    "",
+                    ":dataset",
+                    "    a               ja:RDFDataset ;",
+                    "    ja:defaultGraph :graph ;",
+                    ".",
+                    ":graph",
+                    "    a               ja:MemoryModel ;",
+                    ".",
+                    "",
+                    ":indexLucene",
+                    "    a text:TextIndexLucene ;",
+                    "    text:directory <file:" + INDEX_PATH + "> ;",
+                    "    text:entityMap :entMap ;",
+                    "    .",
+                    "",
+                    ":entMap",
+                    "    a text:EntityMap ;",
+                    "    text:entityField      \"uri\" ;",
+                    "    text:defaultField     \"label\" ;",
+                    "    text:map (",
+                    "         [ text:field \"label\" ; ",
+                    "           text:predicate rdfs:label ;",
+                    "           text:analyzer [ ",
+                    "               a text:LocalizedAnalyzer ;",
+                    "               text:language \"en\" ",
+                    "           ]",
+                    "         ]",
+                    "         [ text:field \"label\" ; text:predicate rdfs:label ]",
+                    "         [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+                    "         ) ."
+                    );
+    }      
+    
+    public void init() {
+        Reader reader = new StringReader(SPEC);
+        Model specModel = ModelFactory.createDefaultModel();
+        specModel.read(reader, "", "TURTLE");
+        TextAssembler.init();            
+//        deleteOldFiles();
+        indexDir.mkdirs();
+        Resource root = specModel.getResource(SPEC_ROOT_URI);
+        dataset = (Dataset) Assembler.general.open(root);
+    }
+    
+    
+    public void deleteOldFiles() {
+        dataset.close();
+        if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+    }    
+
+    @Before 
+    public void beforeClass() {
+        init();
+    }    
+    
+    @After
+    public void afterClass() {
+        deleteOldFiles();
+    }
+    
+    @Test
+    public void testLocalizedAnalyzer() {
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testLocalizedAnalyzer>",
+                "  rdfs:label 'This is my book'",
+                "."
+                );
+        // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label 'books' 10 ) .",
+                "}"
+                );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testLocalizedAnalyzer")) ;
+        doTestSearch(turtle, queryString, expectedURIs);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
new file mode 100644
index 0000000..58a78f1
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.*;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.riot.RDFDataMgr;
+import org.apache.jena.riot.Lang;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestDatasetWithTextIndex {
+    
+    private static final String SPEC_BASE = "http://example.org/spec#";
+    private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+    private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+    private static final String SPEC;
+
+    static final String DIR = "testing/TextQuery" ;
+
+    static {
+        SPEC = StrUtils.strjoinNL(
+                    "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+                    "prefix skos: <http://www.w3.org/2004/02/skos/core#> ",
+                    "prefix ja:   <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+                    "prefix tdb:  <http://jena.hpl.hp.com/2008/tdb#>",
+                    "prefix text: <http://jena.apache.org/text#>",
+                    "prefix :     <" + SPEC_BASE + ">",
+                    "",
+                    "[] ja:loadClass    \"org.apache.jena.query.text.TextQuery\" .",
+                    "text:TextDataset      rdfs:subClassOf   ja:RDFDataset .",
+                    "text:TextIndexLuceneMultilingual  rdfs:subClassOf   text:TextIndex .",
+
+                    ":" + SPEC_ROOT_LOCAL,
+                    "    a              text:TextDataset ;",
+                    "    text:dataset   :dataset ;",
+                    "    text:index     :indexLucene ;",
+                    "    .",
+                    "",
+                    ":dataset",
+                    "    a               ja:RDFDataset ;",
+                    "    ja:defaultGraph :graph ;",
+                    ".",
+                    ":graph",
+                    "    a               ja:MemoryModel ;",
+                    ".",
+                    "",
+                    ":indexLucene",
+                    "    a text:TextIndexLuceneMultilingual ;",
+                    "    text:directory \"mem\" ;",
+                    "    text:entityMap :entMap ;",
+                    "    .",
+                    "",
+                    ":entMap",
+                    "    a text:EntityMap ;",
+                    "    text:entityField      \"uri\" ;",
+                    "    text:defaultField     \"label\" ;",
+                    "    text:map (",
+                    "         [ text:field \"label\" ; text:predicate rdfs:label ]",
+                    "         [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+                    "         [ text:field \"prefLabel\" ; text:predicate skos:prefLabel ]",
+                    "         ) ."
+                    );
+    }
+    
+    @Before
+    public void before() {
+        Reader reader = new StringReader(SPEC);
+        Model specModel = ModelFactory.createDefaultModel();
+        specModel.read(reader, "", "TURTLE");
+        TextAssembler.init();
+        Resource root = specModel.getResource(SPEC_ROOT_URI);
+        dataset = (Dataset) Assembler.general.open(root);
+    }
+    
+    @After
+    public void after() {
+        dataset.close();
+    }
+    
+    @Test
+    public void testNoResultsOnFirstCreateIndex(){
+        String turtle = "";
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label \"book\" \"lang:en\"  10 ) .",
+                "}"
+                );
+        doTestSearch(turtle, queryString, new HashSet<String>());
+    }
+
+    @Test
+    public void testRetrievingEnglishLocalizedResource(){
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testEnglishLocalizedResource>",
+                "  rdfs:label 'He offered me a gift'@en",
+                ".",
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testGermanLocalizedResource>",
+                "  rdfs:label 'Er schluckte gift'@de",
+                "."
+        );
+        // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label 'gift' 'lang:en' 10 ) .",
+                "}"
+        );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testEnglishLocalizedResource")) ;
+        doTestSearch(turtle, queryString, expectedURIs);
+    }
+
+    @Test
+    public void testRetrievingGermanLocalizedResource(){
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testEnglishLocalizedResource>",
+                "  rdfs:label 'He offered me a gift'@en",
+                ".",
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testGermanLocalizedResource>",
+                "  rdfs:label 'Er schluckte gift'@de",
+                "."
+        );
+        // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label 'gift' 'lang:de' 10 ) .",
+                "}"
+        );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testGermanLocalizedResource")) ;
+        doTestSearch(turtle, queryString, expectedURIs);
+    }
+
+    @Test
+    public void testEnglishStemming(){
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testEnglishStemming>",
+                "  rdfs:label 'I met some engineers'@en",
+                "."
+        );
+        // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label 'engineering' 'lang:en' 10 ) .",
+                "}"
+        );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testEnglishStemming")) ;
+        doTestSearch(turtle, queryString, expectedURIs);
+    }
+
+    @Test
+    public void testRetrievingSKOSConcepts() {
+        String queryString = StrUtils.strjoinNL(
+                "PREFIX text: <http://jena.apache.org/text#>",
+                "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>",
+                "PREFIX skos: <http://www.w3.org/2004/02/skos/core#>",
+                "SELECT ?s",
+                "WHERE {",
+                "    { ?s text:query ( skos:prefLabel 'frites' 'lang:fr' ) }",
+                "    UNION ",
+                "    { ?s text:query ( skos:prefLabel 'Kartoffelpüree' 'lang:de' ) }" ,
+                "}"
+        );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll(Arrays.asList("http://example.com/dishes#fries",
+                                          "http://example.com/dishes#mashed")) ;
+
+        dataset.begin(ReadWrite.WRITE);
+        Model model = dataset.getDefaultModel();
+        RDFDataMgr.read(model, DIR + "/data.skos", Lang.RDFXML);
+        dataset.commit();
+        doTestQuery(dataset, "", queryString, expectedURIs, expectedURIs.size());
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/9553c6b2/jena-text/testing/TextQuery/data.skos
----------------------------------------------------------------------
diff --git a/jena-text/testing/TextQuery/data.skos b/jena-text/testing/TextQuery/data.skos
new file mode 100644
index 0000000..056b91d
--- /dev/null
+++ b/jena-text/testing/TextQuery/data.skos
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<rdf:RDF
+    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+    xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+    xmlns:skos="http://www.w3.org/2004/02/skos/core#"
+    xmlns:dc="http://purl.org/dc/elements/1.1/">
+
+    <skos:ConceptScheme rdf:about="http://example.com/dishes">
+        <dc:title>The Example Taxonomy</dc:title>
+        <dc:description>An example taxonomy to illustrate the use of the SKOS schema.</dc:description>
+    </skos:ConceptScheme>
+
+    <skos:Concept rdf:about="http://example.com/dishes#potatoBased">
+        <skos:prefLabel xml:lang="fr">Plats à base de pomme de terre</skos:prefLabel>
+        <skos:prefLabel xml:lang="en">Potato based dishes</skos:prefLabel>
+        <skos:prefLabel xml:lang="de">Kartoffelgerichte</skos:prefLabel>
+        <skos:inScheme rdf:resource="http://example.com/dishes"/>
+        <skos:topConceptOf rdf:resource="http://example.com/dishes"/>
+    </skos:Concept>
+
+    <skos:Concept rdf:about="http://example.com/dishes#fries">
+        <skos:prefLabel xml:lang="fr">Frites</skos:prefLabel>
+        <skos:prefLabel xml:lang="en">French fries</skos:prefLabel>
+        <skos:prefLabel xml:lang="de">Französisch frites</skos:prefLabel>
+        <skos:inScheme rdf:resource="http://example.com/dishes"/>
+        <skos:broader rdf:resource="http://example.com/dishes#potatoBased"/>
+    </skos:Concept>
+
+    <skos:Concept rdf:about="http://example.com/dishes#mashed">
+        <skos:prefLabel xml:lang="fr">Purée de pomme de terre</skos:prefLabel>
+        <skos:prefLabel xml:lang="en">Mashed potatoes</skos:prefLabel>
+        <skos:prefLabel xml:lang="de">Kartoffelpüree</skos:prefLabel>
+        <skos:inScheme rdf:resource="http://example.com/dishes"/>
+        <skos:broader rdf:resource="http://example.com/dishes#potatoBased"/>
+    </skos:Concept>
+</rdf:RDF>
\ No newline at end of file


[2/6] jena git commit: Remove of multilingual aspects in TextIndexLucene + only usage of ISO 639-1 language codes

Posted by an...@apache.org.
Remove of multilingual aspects in TextIndexLucene
+ only usage of ISO 639-1 language codes


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/7ab59ed6
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/7ab59ed6
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/7ab59ed6

Branch: refs/heads/master
Commit: 7ab59ed6d914496c9a1492376745fe9cee840f67
Parents: 9553c6b
Author: Alexis Miara <al...@hotmail.com>
Authored: Thu May 14 09:17:40 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Thu May 14 09:17:40 2015 -0400

----------------------------------------------------------------------
 .../org/apache/jena/query/text/LuceneUtil.java  | 55 ---------------
 .../org/apache/jena/query/text/TextIndex.java   |  3 -
 .../apache/jena/query/text/TextIndexLucene.java | 74 ++++++++------------
 .../query/text/TextIndexLuceneMultilingual.java | 47 ++++++++++++-
 .../apache/jena/query/text/TextIndexSolr.java   |  5 --
 .../org/apache/jena/query/text/TextQueryPF.java |  8 +--
 6 files changed, 78 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
index 7fafc4c..050b6f3 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
@@ -37,7 +37,6 @@ public class LuceneUtil {
     }
 
     public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
-        lang = getISO2Language(lang);
         if (lang == null)
             return null;
 
@@ -58,60 +57,6 @@ public class LuceneUtil {
         }
     }
 
-    public static String getISO2Language(String lang) {
-        if (lang != null) {
-            lang = lang.split("-")[0].toLowerCase();
-            if (lang.length() == 2)
-                return lang;
-            else {
-                if ("ara".equals(lang)) return "ar";
-                if ("bul".equals(lang)) return "bg";
-                if ("cat".equals(lang)) return "ca";
-                if ("ces".equals(lang)) return "cs";
-                if ("cze".equals(lang)) return "cs";
-                if ("dan".equals(lang)) return "da";
-                if ("deu".equals(lang)) return "de";
-                if ("ger".equals(lang)) return "de";
-                if ("ell".equals(lang)) return "el";
-                if ("gre".equals(lang)) return "el";
-                if ("eng".equals(lang)) return "en";
-                if ("spa".equals(lang)) return "es";
-                if ("eus".equals(lang)) return "eu";
-                if ("baq".equals(lang)) return "eu";
-                if ("fas".equals(lang)) return "fa";
-                if ("per".equals(lang)) return "fa";
-                if ("fin".equals(lang)) return "fi";
-                if ("fra".equals(lang)) return "fr";
-                if ("fre".equals(lang)) return "fr";
-                if ("gle".equals(lang)) return "ga";
-                if ("glg".equals(lang)) return "gl";
-                if ("hin".equals(lang)) return "hi";
-                if ("hun".equals(lang)) return "hu";
-                if ("hye".equals(lang)) return "hy";
-                if ("arm".equals(lang)) return "hy";
-                if ("ind".equals(lang)) return "id";
-                if ("ita".equals(lang)) return "it";
-                if ("jpn".equals(lang)) return "jp";
-                if ("kor".equals(lang)) return "ko";
-                if ("lav".equals(lang)) return "lv";
-                if ("nld".equals(lang)) return "nl";
-                if ("dut".equals(lang)) return "nl";
-                if ("nor".equals(lang)) return "no";
-                if ("por".equals(lang)) return "pt";
-                if ("ron".equals(lang)) return "ro";
-                if ("rum".equals(lang)) return "ro";
-                if ("rus".equals(lang)) return "ru";
-                if ("swe".equals(lang)) return "sv";
-                if ("tha".equals(lang)) return "th";
-                if ("tur".equals(lang)) return "tr";
-                if ("zho".equals(lang)) return "zh";
-                if ("chi".equals(lang)) return "zh";
-            }
-        }
-
-        return null;
-    }
-
     private static void initAnalyzerDefs() {
         analyzersClasses = new Hashtable<>();
         analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);

http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
index ffe92e7..69efb31 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
@@ -51,7 +51,4 @@ public interface TextIndex extends Closeable //, Transactional
     List<Node> query(String qs) ;
 
     EntityDefinition getDocDef() ;
-
-    //localization
-    boolean isMultilingual() ;
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 004c242..abb9466 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -206,42 +206,36 @@ public class TextIndexLucene implements TextIndex {
         if ( log.isDebugEnabled() )
             log.debug("Update entity: " + entity) ;
         try {
-            Document doc = doc(entity);
-            Analyzer analyzer = null;
-            if (isMultilingual())
-                analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
-            Term term = new Term(docDef.getEntityField(), entity.getId());
-
-            if (analyzer != null)
-                indexWriter.updateDocument(term, doc, analyzer) ;
-            else //use the default one
-                indexWriter.updateDocument(term, doc);
+            updateDocument(entity);
         } catch (IOException e) {
             throw new TextIndexException(e) ;
         }
     }
 
+    protected void updateDocument(Entity entity) throws IOException {
+        Document doc = doc(entity);
+        Term term = new Term(docDef.getEntityField(), entity.getId());
+        indexWriter.updateDocument(term, doc);
+    }
+
     @Override
     public void addEntity(Entity entity) {
         if ( log.isDebugEnabled() )
             log.debug("Add entity: " + entity) ;
         try {
-            Document doc = doc(entity) ;
-            Analyzer analyzer = null;
-            if (isMultilingual())
-                analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
-
-            if (analyzer != null)
-                indexWriter.addDocument(doc, analyzer) ;
-            else //use the default one
-                indexWriter.addDocument(doc) ;
+            addDocument(entity);
         }
         catch (IOException e) {
             throw new TextIndexException(e) ;
         }
     }
 
-    private Document doc(Entity entity) {
+    protected void addDocument(Entity entity) throws IOException {
+        Document doc = doc(entity) ;
+        indexWriter.addDocument(doc) ;
+    }
+
+    protected Document doc(Entity entity) {
         Document doc = new Document() ;
         Field entField = new Field(docDef.getEntityField(), entity.getId(), ftIRI) ;
         doc.add(entField) ;
@@ -252,18 +246,18 @@ public class TextIndexLucene implements TextIndex {
             doc.add(gField) ;
         }
 
+        for ( Field field : buildContentFields(entity) )
+            doc.add(field);
+
+        return doc ;
+    }
+
+    protected List<Field> buildContentFields(Entity entity) {
+        List<Field> list = new ArrayList<>();
         for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
-            Field field = new Field(e.getKey(), (String)e.getValue(), ftText) ;
-            doc.add(field) ;
-            if (isMultilingual()) {
-                String lang =  entity.getLanguage();
-                if (lang == null || "".equals(lang))
-                    lang = "undef";
-                field = new Field("lang", lang, StringField.TYPE_STORED ) ;
-                doc.add(field) ;
-            }
+            list.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
         }
-        return doc ;
+        return list;
     }
 
     @Override
@@ -289,10 +283,14 @@ public class TextIndexLucene implements TextIndex {
         return query ;
     }
 
+    protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+        return parseQuery(queryString, primaryField, analyzer);
+    }
+
     private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
         String escaped = QueryParserBase.escape(uri) ;
         String qs = docDef.getEntityField() + ":" + escaped ;
-        Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+        Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
         ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs ;
         List<Map<String, Node>> records = new ArrayList<Map<String, Node>>() ;
@@ -339,14 +337,7 @@ public class TextIndexLucene implements TextIndex {
 
     private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
-        Analyzer qAnalyzer = queryAnalyzer;
-        if (isMultilingual()) {//index and query analyzer must be the same
-            String lang = qs.substring( qs.lastIndexOf(":") + 1);
-            if (!"undef".equals(lang))
-                qAnalyzer = LuceneUtil.getLocalizedAnalyzer(lang);
-        }
-
-        Query query = parseQuery(qs, docDef.getPrimaryField(), qAnalyzer) ;
+        Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
         if ( limit <= 0 )
             limit = MAX_N ;
         ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
@@ -370,11 +361,6 @@ public class TextIndexLucene implements TextIndex {
         return docDef ;
     }
 
-    @Override
-    public boolean isMultilingual() {
-        return false;
-    }
-
     private Node entryToNode(String v) {
         // TEMP
         return NodeFactoryExtra.createLiteralNode(v, null, null) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index 86b34e6..cdf7876 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -18,8 +18,18 @@
 
 package org.apache.jena.query.text;
 
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 
+import java.io.IOException;
+import java.util.List;
+
 public class TextIndexLuceneMultilingual extends TextIndexLucene {
 
     public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
@@ -27,7 +37,40 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
     }
 
     @Override
-    public boolean isMultilingual() {
-        return true;
+    protected void updateDocument(Entity entity) throws IOException {
+        Document doc = doc(entity);
+        Term term = new Term(getDocDef().getEntityField(), entity.getId());
+        Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+        if (analyzer == null)
+            analyzer = getAnalyzer();
+        getIndexWriter().updateDocument(term, doc, analyzer) ;
+    }
+
+    @Override
+    protected void addDocument(Entity entity) throws IOException {
+        Document doc = doc(entity) ;
+        Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+        if (analyzer == null)
+            analyzer = getAnalyzer();
+        getIndexWriter().addDocument(doc, analyzer) ;
+    }
+
+    @Override
+    protected List<Field> buildContentFields(Entity entity) {
+        List<Field> list = super.buildContentFields(entity);
+        String lang =  entity.getLanguage();
+        if (lang == null || "".equals(lang))
+            lang = "undef";
+        list.add( new Field("lang", lang, StringField.TYPE_STORED ) );
+        return list;
+    }
+
+    @Override
+    protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+        String lang = queryString.substring( queryString.lastIndexOf(":") + 1);
+        if (!"undef".equals(lang))
+            analyzer = LuceneUtil.getLocalizedAnalyzer(lang);
+
+        return super.preParseQuery(queryString, primaryField, analyzer);
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
index 5be898c..54a3263 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
@@ -231,11 +231,6 @@ public class TextIndexSolr implements TextIndex
         return docDef ;
     }
 
-    @Override
-    public boolean isMultilingual() {
-        return false;
-    }
-
     private Node entryToNode(String v)
     {
         // TEMP

http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index cc7e4f6..4fac00b 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -74,11 +74,9 @@ public class TextQueryPF extends PropertyFunctionBase {
         if (argObject.isList()) {
             //extract of extra lang arg if present and if is usable (multilingual index).
             //arg is removed from the list to avoid conflict with order and args length
-            if (server.isMultilingual()) {
-                langArg = extractArg("lang", argObject);
-                if (langArg == null)
-                    langArg = "undef";
-            }
+            langArg = extractArg("lang", argObject);
+            if (langArg == null && server instanceof TextIndexLuceneMultilingual)
+                langArg = "undef";
 
             List<Node> list = argObject.getArgList() ;
             if (list.size() == 0)


[3/6] jena git commit: langField implementation to store lang tags of literals + refactoring growing methods of TextDatasetFactory

Posted by an...@apache.org.
langField implementation to store lang tags of literals
+ refactoring growing methods of TextDatasetFactory


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1a57c9d3
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1a57c9d3
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1a57c9d3

Branch: refs/heads/master
Commit: 1a57c9d35b9ecf17c7e65c6bf7f19951adc3e44f
Parents: 7ab59ed
Author: Alexis Miara <al...@hotmail.com>
Authored: Tue May 19 14:41:32 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Tue May 19 14:41:32 2015 -0400

----------------------------------------------------------------------
 .../main/java/examples/JenaTextExample1.java    |   6 +-
 .../jena/query/text/EntityDefinition.java       |  70 +++--------
 .../org/apache/jena/query/text/LuceneUtil.java  |  95 --------------
 .../jena/query/text/TextDatasetFactory.java     | 109 ++--------------
 .../apache/jena/query/text/TextIndexConfig.java |  61 +++++++++
 .../apache/jena/query/text/TextIndexLucene.java |  50 +++-----
 .../query/text/TextIndexLuceneMultilingual.java |  41 +++---
 .../org/apache/jena/query/text/TextQueryPF.java |  16 +--
 .../apache/jena/query/text/analyzer/Util.java   |  96 ++++++++++++++
 .../assembler/EntityDefinitionAssembler.java    |   8 +-
 .../assembler/LocalizedAnalyzerAssembler.java   |   4 +-
 .../query/text/assembler/TextAssembler.java     |   1 -
 .../assembler/TextIndexLuceneAssembler.java     |  40 ++++--
 .../TextIndexLuceneMultilingualAssembler.java   |  87 -------------
 .../jena/query/text/assembler/TextVocab.java    |   4 +-
 ...ractTestDatasetWithLuceneGraphTextIndex.java |   6 +-
 .../org/apache/jena/query/text/TS_Text.java     |   1 +
 .../jena/query/text/TestBuildTextDataset.java   |   5 +-
 ...tDatasetWithLuceneMultilingualTextIndex.java |  34 ++++-
 ...DatasetWithLuceneTextIndexWithLangField.java | 126 +++++++++++++++++++
 .../text/TestLuceneWithMultipleThreads.java     |  10 +-
 .../org/apache/jena/query/text/TestTextTDB.java |   5 +-
 22 files changed, 453 insertions(+), 422 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/examples/JenaTextExample1.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/examples/JenaTextExample1.java b/jena-text/src/main/java/examples/JenaTextExample1.java
index 6ad2c26..c273540 100644
--- a/jena-text/src/main/java/examples/JenaTextExample1.java
+++ b/jena-text/src/main/java/examples/JenaTextExample1.java
@@ -23,6 +23,7 @@ import org.apache.jena.atlas.logging.LogCtl ;
 import org.apache.jena.query.* ;
 import org.apache.jena.query.text.EntityDefinition ;
 import org.apache.jena.query.text.TextDatasetFactory ;
+import org.apache.jena.query.text.TextIndexConfig;
 import org.apache.jena.query.text.TextQuery ;
 import org.apache.jena.rdf.model.Model ;
 import org.apache.jena.riot.RDFDataMgr ;
@@ -58,13 +59,14 @@ public class JenaTextExample1
         Dataset ds1 = DatasetFactory.createMem() ; 
 
         // Define the index mapping 
-        EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+        EntityDefinition entDef = new EntityDefinition("uri", "text");
+        entDef.setPrimaryPredicate(RDFS.label.asNode());
 
         // Lucene, in memory.
         Directory dir =  new RAMDirectory();
         
         // Join together into a dataset
-        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
+        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef)) ;
         
         return ds ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
index 2f15ffb..30b048a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
@@ -41,7 +41,8 @@ public class EntityDefinition {
     // Collections.unmodifiableCollection(fieldToPredicate.keySet()) ;
     private final String                 entityField ;
     private final String                 primaryField ;
-    private final String                 graphField ;
+    private String                 graphField ;
+    private String                 langField ;
     //private final Node                   primaryPredicate ;
 
     /**
@@ -51,67 +52,22 @@ public class EntityDefinition {
      *            The primary/default field to search
      */
     public EntityDefinition(String entityField, String primaryField) {
-        this(entityField, primaryField, (String)null) ;
-    }
-
-    /**
-     * @param entityField
-     *            The entity being indexed (e.g. it's URI).
-     * @param primaryField
-     *            The primary/default field to search
-     * @param graphField
-     *            The field that stores graph URI, or null
-     */
-    public EntityDefinition(String entityField, String primaryField, String graphField) {
         this.entityField = entityField ;
         this.primaryField = primaryField ;
-        this.graphField = graphField ;
     }
 
-    /**
-     * @param entityField
-     *            The entity being indexed (e.g. it's URI).
-     * @param primaryField
-     *            The primary/default field to search
-     * @param primaryPredicate
-     *            The property associated with the primary/default field
-     */
-    public EntityDefinition(String entityField, String primaryField, Resource primaryPredicate) {
-        this(entityField, primaryField, null, primaryPredicate.asNode()) ;
+    public String getEntityField() {
+        return entityField ;
     }
 
-    /**
-     * @param entityField
-     *            The entity being indexed (e.g. it's URI).
-     * @param primaryField
-     *            The primary/default field to search
-     * @param primaryPredicate
-     *            The property associated with the primary/default field
-     */
-    public EntityDefinition(String entityField, String primaryField, Node primaryPredicate) {
-        this(entityField, primaryField, null, primaryPredicate) ;
+    public void setPrimaryPredicate(Resource primaryPredicate) {
+        setPrimaryPredicate(primaryPredicate.asNode());
     }
 
-    /**
-     * @param entityField
-     *            The entity being indexed (e.g. it's URI).
-     * @param primaryField
-     *            The primary/default field to search
-     * @param graphField
-     *            The field that stores graph URI, or null
-     * @param primaryPredicate
-     *            The property associated with the primary/default field
-     */
-    public EntityDefinition(String entityField, String primaryField, String graphField, Node primaryPredicate) {
-        this(entityField, primaryField, graphField) ;
+    public void setPrimaryPredicate(Node primaryPredicate) {
         set(primaryField, primaryPredicate) ;
     }
 
-
-    public String getEntityField() {
-        return entityField ;
-    }
-
     public void set(String field, Node predicate) {
         predicateToField.put(predicate, field) ;
         // Add uniquely.
@@ -149,6 +105,18 @@ public class EntityDefinition {
         return graphField ;
     }
 
+    public void setGraphField(String graphField) {
+        this.graphField = graphField;
+    }
+
+    public String getLangField() {
+        return langField;
+    }
+
+    public void setLangField(String langField) {
+        this.langField = langField;
+    }
+
     public Collection<String> fields() {
         return fields ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
deleted file mode 100644
index 050b6f3..0000000
--- a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.query.text;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.util.Version;
-import java.lang.reflect.Constructor;
-import java.util.Hashtable;
-
-public class LuceneUtil {
-
-    private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
-    private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
-
-    static {
-        initAnalyzerDefs();
-    }
-
-    public static Analyzer getLocalizedAnalyzer(String lang) {
-        return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
-    }
-
-    public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
-        if (lang == null)
-            return null;
-
-        if (cache.containsKey(lang))
-            return cache.get(lang);
-
-        try {
-            Class<?> className = analyzersClasses.get(lang);
-            if (className == null)
-                return null;
-            Constructor constructor = className.getConstructor(Version.class);
-            Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
-            cache.put(lang, analyzer);
-            return analyzer;
-        } catch (Exception e) {
-            e.printStackTrace();
-            return null;
-        }
-    }
-
-    private static void initAnalyzerDefs() {
-        analyzersClasses = new Hashtable<>();
-        analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
-        analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
-        analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
-        analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
-        analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
-        analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
-        analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
-        analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
-        analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
-        analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
-        analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
-        analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
-        analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
-        analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
-        analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
-        analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
-        analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
-        analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
-        analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
-        analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
-        analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
-        analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
-        analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
-        analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
-        analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
-        analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
-        analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
-        analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
-        analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
-        analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
-        analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
-        analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index dd48bfa..dc6a094 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -24,7 +24,6 @@ import org.apache.jena.query.text.assembler.TextVocab ;
 import org.apache.jena.sparql.core.DatasetGraph ;
 import org.apache.jena.sparql.core.assembler.AssemblerUtils ;
 import org.apache.jena.sparql.util.Context ;
-import org.apache.lucene.analysis.Analyzer ;
 import org.apache.lucene.store.Directory ;
 import org.apache.solr.client.solrj.SolrServer ;
 
@@ -88,80 +87,28 @@ public class TextDatasetFactory
      * Create a Lucene TextIndex
      *
      * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     * @param config The config definition for the index instantiation.
      */
-    public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
+    public static TextIndex createLuceneIndex(Directory directory, TextIndexConfig config)
     {
-        TextIndex index = new TextIndexLucene(directory, def, queryAnalyzer) ;
+        TextIndex index;
+        if (config.isMultilingualSupport())
+            index = new TextIndexLuceneMultilingual(directory, config) ;
+        else
+            index = new TextIndexLucene(directory, config) ;
         return index ;
     }
 
     /**
-     * Create a Lucene TextIndex
-     * 
-     * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
-     */ 
-    public static TextIndex createLuceneIndex(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
-    {
-        TextIndex index = new TextIndexLucene(directory, def, analyzer, queryAnalyzer) ;
-        return index ; 
-    }
-
-    /**
-     * Create a multilingual Lucene TextIndex
-     *
-     * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     */
-    public static TextIndex createLuceneIndexMultilingual(Directory directory, EntityDefinition def)
-    {
-        TextIndex index = new TextIndexLuceneMultilingual(directory, def) ;
-        return index ;
-    }
-
-    /**
-     * Create a text-indexed dataset, using Lucene
-     *
-     * @param base the base Dataset
-     * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
-     */
-    public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
-    {
-        TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
-        return create(base, index, true) ;
-    }
-
-    /**
      * Create a text-indexed dataset, using Lucene
-     * 
-     * @param base the base Dataset
-     * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
-     */ 
-    public static Dataset createLucene(Dataset base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
-    {
-        TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
-        return create(base, index, true) ; 
-    }
-
-    /**
-     * Create a multilingual text-indexed dataset, using Lucene
      *
      * @param base the base Dataset
      * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
+     * @param config The config definition for the index instantiation.
      */
-    public static Dataset createLuceneMultilingual(Dataset base, Directory directory, EntityDefinition def)
+    public static Dataset createLucene(Dataset base, Directory directory, TextIndexConfig config)
     {
-        TextIndex index = createLuceneIndexMultilingual(directory, def) ;
+        TextIndex index = createLuceneIndex(directory, config) ;
         return create(base, index, true) ;
     }
 
@@ -170,44 +117,14 @@ public class TextDatasetFactory
      *
      * @param base the base DatasetGraph
      * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     * @param config The config definition for the index instantiation.
      */
-    public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer queryAnalyzer)
+    public static DatasetGraph createLucene(DatasetGraph base, Directory directory, TextIndexConfig config)
     {
-        TextIndex index = createLuceneIndex(directory, def, queryAnalyzer) ;
+        TextIndex index = createLuceneIndex(directory, config) ;
         return create(base, index, true) ;
     }
 
-    /**
-     * Create a text-indexed dataset, using Lucene
-     * 
-     * @param base the base DatasetGraph
-     * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
-     */ 
-    public static DatasetGraph createLucene(DatasetGraph base, Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer)
-    {
-        TextIndex index = createLuceneIndex(directory, def, analyzer, queryAnalyzer) ;
-        return create(base, index, true) ; 
-    }
-
-    /**
-     * Create a multilingual text-indexed dataset, using Lucene
-     *
-     * @param base the base DatasetGraph
-     * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     */
-    public static DatasetGraph createLuceneMultilingual(DatasetGraph base, Directory directory, EntityDefinition def)
-    {
-        TextIndex index = createLuceneIndexMultilingual(directory, def) ;
-        return create(base, index, true) ;
-    }
-
-
     /** Create a Solr TextIndex */
     public static TextIndex createSolrIndex(SolrServer server, EntityDefinition entMap)
     {

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
new file mode 100644
index 0000000..feeb324
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.lucene.analysis.Analyzer;
+
+public class TextIndexConfig {
+
+    EntityDefinition entDef;
+    Analyzer analyzer;
+    Analyzer queryAnalyzer;
+    boolean multilingualSupport;
+
+    public TextIndexConfig(EntityDefinition entDef) {
+        this.entDef = entDef;
+    }
+
+    public EntityDefinition getEntDef() {
+        return entDef;
+    }
+
+    public Analyzer getAnalyzer() {
+        return analyzer;
+    }
+
+    public void setAnalyzer(Analyzer analyzer) {
+        this.analyzer = analyzer;
+    }
+
+    public Analyzer getQueryAnalyzer() {
+        return queryAnalyzer;
+    }
+
+    public void setQueryAnalyzer(Analyzer queryAnalyzer) {
+        this.queryAnalyzer = queryAnalyzer;
+    }
+
+    public boolean isMultilingualSupport() {
+        return multilingualSupport;
+    }
+
+    public void setMultilingualSupport(boolean multilingualSupport) {
+        this.multilingualSupport = multilingualSupport;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index abb9466..cd9ea2f 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -87,42 +87,29 @@ public class TextIndexLucene implements TextIndex {
      * Constructs a new TextIndexLucene.
      *
      * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
+     * @param config The config definition for the index instantiation.
      */
-    public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer queryAnalyzer) {
-        this(directory, def, null, queryAnalyzer);
-    }
-
-    /**
-     * Constructs a new TextIndexLucene.
-     * 
-     * @param directory The Lucene Directory for the index
-     * @param def The EntityDefinition that defines how entities are stored in the index
-     * @param analyzer The analyzer to be used to index literals. If null, then the standard analyzer will be used.
-     * @param queryAnalyzer The analyzer to be used to find terms in the query text.  If null, then the analyzer defined by the EntityDefinition will be used.
-     */
-    public TextIndexLucene(Directory directory, EntityDefinition def, Analyzer analyzer, Analyzer queryAnalyzer) {
+    public TextIndexLucene(Directory directory, TextIndexConfig config) {
         this.directory = directory ;
-        this.docDef = def ;
+        this.docDef = config.getEntDef() ;
 
         // create the analyzer as a wrapper that uses KeywordAnalyzer for
         // entity and graph fields and StandardAnalyzer for all other
         Map<String, Analyzer> analyzerPerField = new HashMap<>() ;
-        analyzerPerField.put(def.getEntityField(), new KeywordAnalyzer()) ;
-        if ( def.getGraphField() != null )
-            analyzerPerField.put(def.getGraphField(), new KeywordAnalyzer()) ;
+        analyzerPerField.put(docDef.getEntityField(), new KeywordAnalyzer()) ;
+        if ( docDef.getGraphField() != null )
+            analyzerPerField.put(docDef.getGraphField(), new KeywordAnalyzer()) ;
 
-        for (String field : def.fields()) {
-            Analyzer _analyzer = def.getAnalyzer(field);
+        for (String field : docDef.fields()) {
+            Analyzer _analyzer = docDef.getAnalyzer(field);
             if (_analyzer != null) {
                 analyzerPerField.put(field, _analyzer);
             }
         }
 
         this.analyzer = new PerFieldAnalyzerWrapper(
-                (null != analyzer) ? analyzer : new StandardAnalyzer(VER), analyzerPerField) ;
-        this.queryAnalyzer = (null != queryAnalyzer) ? queryAnalyzer : this.analyzer ;
+                (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
+        this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
 
         openIndexWriter();
     }
@@ -246,18 +233,17 @@ public class TextIndexLucene implements TextIndex {
             doc.add(gField) ;
         }
 
-        for ( Field field : buildContentFields(entity) )
-            doc.add(field);
+        String langField = docDef.getLangField() ;
 
-        return doc ;
-    }
-
-    protected List<Field> buildContentFields(Entity entity) {
-        List<Field> list = new ArrayList<>();
         for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
-            list.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
+            doc.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
+            if (langField != null) {
+                String lang = entity.getLanguage();
+                if (lang != null && !"".equals(lang))
+                    doc.add(new Field(docDef.getLangField(), lang, StringField.TYPE_STORED));
+            }
         }
-        return list;
+        return doc ;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index cdf7876..ce20294 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -18,29 +18,37 @@
 
 package org.apache.jena.query.text;
 
+import org.apache.jena.query.text.analyzer.Util;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 
 import java.io.IOException;
-import java.util.List;
 
 public class TextIndexLuceneMultilingual extends TextIndexLucene {
 
-    public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
-        super(directory, def, null) ;
+    /**
+     * Constructs a new TextIndexLuceneMultilingual.
+     *
+     * @param directory The Lucene Directory for the index
+     * @param config The config definition for the index instantiation.
+     */
+    public TextIndexLuceneMultilingual(Directory directory, TextIndexConfig config) {
+        super(directory, config) ;
+
+        //multilingual index cannot work without lang field
+        if (config.getEntDef().getLangField() == null)
+            config.getEntDef().setLangField("lang");
     }
 
     @Override
     protected void updateDocument(Entity entity) throws IOException {
         Document doc = doc(entity);
         Term term = new Term(getDocDef().getEntityField(), entity.getId());
-        Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+        Analyzer analyzer = Util.getLocalizedAnalyzer(entity.getLanguage());
         if (analyzer == null)
             analyzer = getAnalyzer();
         getIndexWriter().updateDocument(term, doc, analyzer) ;
@@ -49,28 +57,19 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
     @Override
     protected void addDocument(Entity entity) throws IOException {
         Document doc = doc(entity) ;
-        Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+        Analyzer analyzer = Util.getLocalizedAnalyzer(entity.getLanguage());
         if (analyzer == null)
             analyzer = getAnalyzer();
         getIndexWriter().addDocument(doc, analyzer) ;
     }
 
     @Override
-    protected List<Field> buildContentFields(Entity entity) {
-        List<Field> list = super.buildContentFields(entity);
-        String lang =  entity.getLanguage();
-        if (lang == null || "".equals(lang))
-            lang = "undef";
-        list.add( new Field("lang", lang, StringField.TYPE_STORED ) );
-        return list;
-    }
-
-    @Override
     protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
-        String lang = queryString.substring( queryString.lastIndexOf(":") + 1);
-        if (!"undef".equals(lang))
-            analyzer = LuceneUtil.getLocalizedAnalyzer(lang);
-
+        if (queryString.contains(getDocDef().getLangField() + ":")) {
+            String lang = queryString.substring(queryString.lastIndexOf(":") + 1);
+            if (!"*".equals(lang))
+                analyzer = Util.getLocalizedAnalyzer(lang);
+        }
         return super.preParseQuery(queryString, primaryField, analyzer);
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index 4fac00b..d568232 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -72,11 +72,9 @@ public class TextQueryPF extends PropertyFunctionBase {
             throw new QueryBuildException("Subject is not a single node: " + argSubject) ;
 
         if (argObject.isList()) {
-            //extract of extra lang arg if present and if is usable (multilingual index).
+            //extract of extra lang arg if present and if is usable.
             //arg is removed from the list to avoid conflict with order and args length
             langArg = extractArg("lang", argObject);
-            if (langArg == null && server instanceof TextIndexLuceneMultilingual)
-                langArg = "undef";
 
             List<Node> list = argObject.getArgList() ;
             if (list.size() == 0)
@@ -210,10 +208,14 @@ public class TextQueryPF extends PropertyFunctionBase {
             }
         }
 
-        //for multilingual index
-        if (langArg != null) {
-            String qs2 = "lang:" + langArg;
-            queryString = "(" + queryString + ") AND " + qs2 ;
+        //for language-based search extension
+        if (server.getDocDef().getLangField() != null) {
+            String field = server.getDocDef().getLangField();
+            if (langArg != null) {
+                String qs2 = !"none".equals(langArg)?
+                        field + ":" + langArg : "-" + field + ":*";
+                queryString = "(" + queryString + ") AND " + qs2;
+            }
         }
 
         Explain.explain(execCxt.getContext(), "Text query: "+queryString) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
new file mode 100644
index 0000000..11dd683
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer;
+
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.Version;
+import java.lang.reflect.Constructor;
+import java.util.Hashtable;
+
+public class Util {
+
+    private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
+    private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
+
+    static {
+        initAnalyzerDefs();
+    }
+
+    public static Analyzer getLocalizedAnalyzer(String lang) {
+        return getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+    }
+
+    public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
+        if (lang == null)
+            return null;
+
+        if (cache.containsKey(lang))
+            return cache.get(lang);
+
+        try {
+            Class<?> className = analyzersClasses.get(lang);
+            if (className == null)
+                return null;
+            Constructor constructor = className.getConstructor(Version.class);
+            Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
+            cache.put(lang, analyzer);
+            return analyzer;
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+    private static void initAnalyzerDefs() {
+        analyzersClasses = new Hashtable<>();
+        analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
+        analyzersClasses.put("bg", org.apache.lucene.analysis.bg.BulgarianAnalyzer.class);
+        analyzersClasses.put("ca", org.apache.lucene.analysis.ca.CatalanAnalyzer.class);
+        analyzersClasses.put("cs", org.apache.lucene.analysis.cz.CzechAnalyzer.class);
+        analyzersClasses.put("da", org.apache.lucene.analysis.da.DanishAnalyzer.class);
+        analyzersClasses.put("de", org.apache.lucene.analysis.de.GermanAnalyzer.class);
+        analyzersClasses.put("el", org.apache.lucene.analysis.el.GreekAnalyzer.class);
+        analyzersClasses.put("en", org.apache.lucene.analysis.en.EnglishAnalyzer.class);
+        analyzersClasses.put("es", org.apache.lucene.analysis.es.SpanishAnalyzer.class);
+        analyzersClasses.put("eu", org.apache.lucene.analysis.eu.BasqueAnalyzer.class);
+        analyzersClasses.put("fa", org.apache.lucene.analysis.fa.PersianAnalyzer.class);
+        analyzersClasses.put("fi", org.apache.lucene.analysis.fi.FinnishAnalyzer.class);
+        analyzersClasses.put("fr", org.apache.lucene.analysis.fr.FrenchAnalyzer.class);
+        analyzersClasses.put("ga", org.apache.lucene.analysis.ga.IrishAnalyzer.class);
+        analyzersClasses.put("gl", org.apache.lucene.analysis.gl.GalicianAnalyzer.class);
+        analyzersClasses.put("hi", org.apache.lucene.analysis.hi.HindiAnalyzer.class);
+        analyzersClasses.put("hu", org.apache.lucene.analysis.hu.HungarianAnalyzer.class);
+        analyzersClasses.put("hy", org.apache.lucene.analysis.hy.ArmenianAnalyzer.class);
+        analyzersClasses.put("id", org.apache.lucene.analysis.id.IndonesianAnalyzer.class);
+        analyzersClasses.put("it", org.apache.lucene.analysis.it.ItalianAnalyzer.class);
+        analyzersClasses.put("ja", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+        analyzersClasses.put("ko", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+        analyzersClasses.put("lv", org.apache.lucene.analysis.lv.LatvianAnalyzer.class);
+        analyzersClasses.put("nl", org.apache.lucene.analysis.nl.DutchAnalyzer.class);
+        analyzersClasses.put("no", org.apache.lucene.analysis.no.NorwegianAnalyzer.class);
+        analyzersClasses.put("pt", org.apache.lucene.analysis.pt.PortugueseAnalyzer.class);
+        analyzersClasses.put("ro", org.apache.lucene.analysis.ro.RomanianAnalyzer.class);
+        analyzersClasses.put("ru", org.apache.lucene.analysis.ru.RussianAnalyzer.class);
+        analyzersClasses.put("sv", org.apache.lucene.analysis.sv.SwedishAnalyzer.class);
+        analyzersClasses.put("th", org.apache.lucene.analysis.th.ThaiAnalyzer.class);
+        analyzersClasses.put("tr", org.apache.lucene.analysis.tr.TurkishAnalyzer.class);
+        analyzersClasses.put("zh", org.apache.lucene.analysis.cjk.CJKAnalyzer.class);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
index ca66f27..7604822 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
@@ -66,6 +66,9 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
                                         "  OPTIONAL {" ,
                                         "    ?eMap :graphField ?graphField" ,
                                         "  }",
+                                        "  OPTIONAL {" ,
+                                        "    ?eMap :langField ?langField" ,
+                                        "  }",
             "}") ;
         ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1) ;
         pss.setIri("eMap", root.getURI()) ;
@@ -87,6 +90,7 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
         QuerySolution qsol1 = results.get(0) ;
         String entityField = qsol1.getLiteral("entityField").getLexicalForm() ;
         String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
+        String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
         String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null ;
 
         Multimap<String, Node> mapDefs = HashMultimap.create() ; 
@@ -155,7 +159,9 @@ public class EntityDefinitionAssembler extends AssemblerBase implements Assemble
                 throw new TextIndexException("No definition of primary field '"+defaultField+"'") ;
         }
 
-        EntityDefinition docDef = new EntityDefinition(entityField, defaultField, graphField) ;
+        EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
+        docDef.setGraphField(graphField);
+        docDef.setLangField(langField);
         for ( String f : mapDefs.keys() ) {
             for ( Node p : mapDefs.get(f)) 
                 docDef.set(f, p) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
index 1e37c15..b9d83de 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/LocalizedAnalyzerAssembler.java
@@ -21,9 +21,9 @@ package org.apache.jena.query.text.assembler;
 import org.apache.jena.assembler.Assembler;
 import org.apache.jena.assembler.Mode;
 import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.query.text.LuceneUtil;
 import org.apache.jena.query.text.TextIndexException;
 import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.query.text.analyzer.Util;
 import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.lucene.analysis.Analyzer;
@@ -52,7 +52,7 @@ public class LocalizedAnalyzerAssembler extends AssemblerBase {
                 throw new TextIndexException("text:language property must be a string : " + node);
             }
             String lang = node.toString();
-            return LuceneUtil.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
+            return Util.getLocalizedAnalyzer(lang, TextIndexLucene.VER);
         } else {
             return new StandardAnalyzer(TextIndexLucene.VER);
         }

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index 790dac7..021c003 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -30,7 +30,6 @@ public class TextAssembler
         Assembler.general.implementWith(TextVocab.entityMap,        new EntityDefinitionAssembler()) ;
         Assembler.general.implementWith(TextVocab.textIndexSolr,    new TextIndexSolrAssembler()) ; 
         Assembler.general.implementWith(TextVocab.textIndexLucene,  new TextIndexLuceneAssembler()) ;
-        Assembler.general.implementWith(TextVocab.textIndexLuceneMultilingual,  new TextIndexLuceneMultilingualAssembler()) ;
         Assembler.general.implementWith(TextVocab.standardAnalyzer, new StandardAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.simpleAnalyzer,   new SimpleAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.keywordAnalyzer,  new KeywordAnalyzerAssembler()) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 361841c..abc6c97 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -18,10 +18,6 @@
 
 package org.apache.jena.query.text.assembler ;
 
-import static org.apache.jena.query.text.assembler.TextVocab.pDirectory ;
-import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap ;
-import static org.apache.jena.query.text.assembler.TextVocab.pQueryAnalyzer ;
-
 import java.io.File ;
 import java.io.IOException ;
 
@@ -30,10 +26,7 @@ import org.apache.jena.assembler.Mode ;
 import org.apache.jena.assembler.assemblers.AssemblerBase ;
 import org.apache.jena.atlas.io.IO ;
 import org.apache.jena.atlas.lib.IRILib ;
-import org.apache.jena.query.text.EntityDefinition ;
-import org.apache.jena.query.text.TextDatasetFactory ;
-import org.apache.jena.query.text.TextIndex ;
-import org.apache.jena.query.text.TextIndexException ;
+import org.apache.jena.query.text.*;
 import org.apache.jena.rdf.model.RDFNode ;
 import org.apache.jena.rdf.model.Resource ;
 import org.apache.jena.rdf.model.Statement ;
@@ -43,6 +36,8 @@ import org.apache.lucene.store.Directory ;
 import org.apache.lucene.store.FSDirectory ;
 import org.apache.lucene.store.RAMDirectory ;
 
+import static org.apache.jena.query.text.assembler.TextVocab.*;
+
 public class TextIndexLuceneAssembler extends AssemblerBase {
     /*
     <#index> a :TextIndexLucene ;
@@ -77,7 +72,18 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
                 File dir = new File(path) ;
                 directory = FSDirectory.open(dir) ;
             }
-            
+
+            Analyzer analyzer = null;
+            Statement analyzerStatement = root.getProperty(pAnalyzer);
+            if (null != analyzerStatement) {
+                RDFNode aNode = analyzerStatement.getObject();
+                if (! aNode.isResource()) {
+                    throw new TextIndexException("Text analyzer property is not a resource : " + aNode);
+                }
+                Resource analyzerResource = (Resource) aNode;
+                analyzer = (Analyzer) a.open(analyzerResource);
+            }
+
             Analyzer queryAnalyzer = null;
             Statement queryAnalyzerStatement = root.getProperty(pQueryAnalyzer);
             if (null != queryAnalyzerStatement) {
@@ -89,10 +95,24 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
                 queryAnalyzer = (Analyzer) a.open(analyzerResource);
             }
 
+            boolean isMultilingualSupport = false;
+            Statement mlSupportStatement = root.getProperty(pMultilingualSupport);
+            if (null != mlSupportStatement) {
+                RDFNode mlsNode = mlSupportStatement.getObject();
+                if (! mlsNode.isLiteral()) {
+                    throw new TextIndexException("text:multilingualSupport property must be a string : " + mlsNode);
+                }
+                isMultilingualSupport = mlsNode.asLiteral().getBoolean();
+            }
+
             Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
             EntityDefinition docDef = (EntityDefinition)a.open(r) ;
+            TextIndexConfig config = new TextIndexConfig(docDef);
+            config.setAnalyzer(analyzer);
+            config.setQueryAnalyzer(queryAnalyzer);
+            config.setMultilingualSupport(isMultilingualSupport);
 
-            return TextDatasetFactory.createLuceneIndex(directory, docDef, queryAnalyzer) ;
+            return TextDatasetFactory.createLuceneIndex(directory, config) ;
         } catch (IOException e) {
             IO.exception(e) ;
             return null ;

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
deleted file mode 100644
index a36fcbe..0000000
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneMultilingualAssembler.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.jena.query.text.assembler;
-
-import org.apache.jena.assembler.Assembler;
-import org.apache.jena.assembler.Mode;
-import org.apache.jena.assembler.assemblers.AssemblerBase;
-import org.apache.jena.atlas.io.IO;
-import org.apache.jena.atlas.lib.IRILib;
-import org.apache.jena.query.text.EntityDefinition;
-import org.apache.jena.query.text.TextDatasetFactory;
-import org.apache.jena.query.text.TextIndex;
-import org.apache.jena.query.text.TextIndexException;
-import org.apache.jena.rdf.model.RDFNode;
-import org.apache.jena.rdf.model.Resource;
-import org.apache.jena.sparql.util.graph.GraphUtils;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.RAMDirectory;
-
-import java.io.File;
-import java.io.IOException;
-
-import static org.apache.jena.query.text.assembler.TextVocab.pDirectory;
-import static org.apache.jena.query.text.assembler.TextVocab.pEntityMap;
-
-public class TextIndexLuceneMultilingualAssembler extends AssemblerBase {
-    /*
-    <#index> a :TextIndexLuceneMultilingual ;
-        #text:directory "mem" ;
-        #text:directory "DIR" ;
-        text:directory <file:DIR> ;
-        text:entityMap <#endMap> ;
-        .
-    */
-    
-    @SuppressWarnings("resource")
-    @Override
-    public TextIndex open(Assembler a, Resource root, Mode mode) {
-        try {
-            if ( !GraphUtils.exactlyOneProperty(root, pDirectory) )
-                throw new TextIndexException("No 'text:directory' property on " + root) ;
-
-            Directory directory ;
-
-            RDFNode n = root.getProperty(pDirectory).getObject() ;
-            if ( n.isLiteral() ) {
-                String literalValue = n.asLiteral().getLexicalForm() ;
-                if (literalValue.equals("mem")) {
-                    directory = new RAMDirectory() ;
-                } else {
-                    File dir = new File(literalValue) ;
-                    directory = FSDirectory.open(dir) ;
-                }
-            } else {
-                Resource x = n.asResource() ;
-                String path = IRILib.IRIToFilename(x.getURI()) ;
-                File dir = new File(path) ;
-                directory = FSDirectory.open(dir) ;
-            }
-
-            Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
-            EntityDefinition docDef = (EntityDefinition)a.open(r) ;
-
-            return TextDatasetFactory.createLuceneIndexMultilingual(directory, docDef) ;
-        } catch (IOException e) {
-            IO.exception(e) ;
-            return null ;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 79c223e..802990d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -36,10 +36,10 @@ public class TextVocab
     public static final Resource textIndex          = Vocab.resource(NS, "TextIndex") ;
     public static final Resource textIndexSolr      = Vocab.resource(NS, "TextIndexSolr") ;
     public static final Resource textIndexLucene    = Vocab.resource(NS, "TextIndexLucene") ;
-    public static final Resource textIndexLuceneMultilingual    = Vocab.resource(NS, "TextIndexLuceneMultilingual") ;
     public static final Property pLanguage          = Vocab.property(NS, "language") ;
     public static final Property pServer            = Vocab.property(NS, "server") ;            // Solr
     public static final Property pDirectory         = Vocab.property(NS, "directory") ;         // Lucene
+    public static final Property pMultilingualSupport   = Vocab.property(NS, "multilingualSupport") ;
     public static final Property pQueryAnalyzer     = Vocab.property(NS, "queryAnalyzer") ;
     public static final Property pEntityMap         = Vocab.property(NS, "entityMap") ;
     
@@ -47,6 +47,8 @@ public class TextVocab
     public static final Resource entityMap          = Vocab.resource(NS, "EntityMap") ;
     public static final Property pEntityField       = Vocab.property(NS, "entityField") ;
     public static final Property pDefaultField      = Vocab.property(NS, "defaultField") ;
+    public static final Property pGraphField        = Vocab.property(NS, "graphField") ;
+    public static final Property pLangField         = Vocab.property(NS, "langField") ;
     public static final Property pMap               = Vocab.property(NS, "map") ;
     public static final Property pField             = Vocab.property(NS, "field") ;
     public static final Property pPredicate         = Vocab.property(NS, "predicate") ;

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
index 56a81b6..1670f63 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
@@ -36,9 +36,11 @@ public class AbstractTestDatasetWithLuceneGraphTextIndex extends AbstractTestDat
     public void init() {
         Dataset ds1 = TDBFactory.createDataset() ;
         Directory dir = new RAMDirectory() ;
-        EntityDefinition eDef = new EntityDefinition("iri", "text", "graph", RDFS.label.asNode()) ;
+        EntityDefinition eDef = new EntityDefinition("iri", "text");
+        eDef.setGraphField("graph");
+        eDef.setPrimaryPredicate(RDFS.label.asNode());
         eDef.set("comment", RDFS.comment.asNode()) ; // some tests require indexing rdfs:comment
-        TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
+        TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
         dataset = TextDatasetFactory.create(ds1, tidx) ;
     }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 0219675..6d1cb25 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -31,6 +31,7 @@ import org.junit.runners.Suite.SuiteClasses ;
     TestBuildTextDataset.class
     , TestDatasetWithLuceneTextIndex.class
     , TestDatasetWithLuceneMultilingualTextIndex.class
+    , TestDatasetWithLuceneTextIndexWithLangField.class
     , TestDatasetWithLuceneGraphTextIndex.class
     
     // Embedded solr not supported 

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
index 2c3564d..02d02f9 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
@@ -110,13 +110,14 @@ public class TestBuildTextDataset extends BaseTest
         Dataset ds1 = DatasetFactory.createMem() ;
 
         // Define the index mapping
-        EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ;
+        EntityDefinition entDef = new EntityDefinition("uri", "text");
+        entDef.setPrimaryPredicate(RDFS.label.asNode());
 
         // Lucene, in memory.
         Directory dir = new RAMDirectory() ;
 
         // Join together into a dataset
-        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, entDef, null) ;
+        Dataset ds = TextDatasetFactory.createLucene(ds1, dir, new TextIndexConfig(entDef)) ;
 
         return ds ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
index 58a78f1..53e2426 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneMultilingualTextIndex.java
@@ -57,7 +57,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
                     "",
                     "[] ja:loadClass    \"org.apache.jena.query.text.TextQuery\" .",
                     "text:TextDataset      rdfs:subClassOf   ja:RDFDataset .",
-                    "text:TextIndexLuceneMultilingual  rdfs:subClassOf   text:TextIndex .",
+                    "text:TextIndexLucene  rdfs:subClassOf   text:TextIndex .",
 
                     ":" + SPEC_ROOT_LOCAL,
                     "    a              text:TextDataset ;",
@@ -74,8 +74,9 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
                     ".",
                     "",
                     ":indexLucene",
-                    "    a text:TextIndexLuceneMultilingual ;",
+                    "    a text:TextIndexLucene ;",
                     "    text:directory \"mem\" ;",
+                    "    text:multilingualSupport true ;",
                     "    text:entityMap :entMap ;",
                     "    .",
                     "",
@@ -83,6 +84,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
                     "    a text:EntityMap ;",
                     "    text:entityField      \"uri\" ;",
                     "    text:defaultField     \"label\" ;",
+                    "    text:langField        \"lang\" ;",
                     "    text:map (",
                     "         [ text:field \"label\" ; text:predicate rdfs:label ]",
                     "         [ text:field \"comment\" ; text:predicate rdfs:comment ]",
@@ -113,7 +115,7 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
                 QUERY_PROLOG,
                 "SELECT ?s",
                 "WHERE {",
-                "    ?s text:query ( rdfs:label \"book\" \"lang:en\"  10 ) .",
+                "    ?s text:query ( rdfs:label 'book' 'lang:en'  10 ) .",
                 "}"
                 );
         doTestSearch(turtle, queryString, new HashSet<String>());
@@ -131,7 +133,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
                 "  rdfs:label 'Er schluckte gift'@de",
                 "."
         );
-        // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
         String queryString = StrUtils.strjoinNL(
                 QUERY_PROLOG,
                 "SELECT ?s",
@@ -156,7 +157,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
                 "  rdfs:label 'Er schluckte gift'@de",
                 "."
         );
-        // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
         String queryString = StrUtils.strjoinNL(
                 QUERY_PROLOG,
                 "SELECT ?s",
@@ -177,7 +177,6 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
                 "  rdfs:label 'I met some engineers'@en",
                 "."
         );
-        // the localized analyzer should use localized lucene index facilities (stop words, stemming...)
         String queryString = StrUtils.strjoinNL(
                 QUERY_PROLOG,
                 "SELECT ?s",
@@ -191,6 +190,29 @@ public class TestDatasetWithLuceneMultilingualTextIndex extends AbstractTestData
     }
 
     @Test
+    public void testRetrievingUnlocalizedResource(){
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "testLocalizedResource>",
+                "  rdfs:label 'A localized text'@en",
+                ".",
+                "<" + RESOURCE_BASE + "testUnlocalizedResource>",
+                "  rdfs:label 'An unlocalized text'",
+                "."
+        );
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label 'text' 'lang:none' 10 ) .",
+                "}"
+        );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testUnlocalizedResource")) ;
+        doTestSearch(turtle, queryString, expectedURIs);
+    }
+
+    @Test
     public void testRetrievingSKOSConcepts() {
         String queryString = StrUtils.strjoinNL(
                 "PREFIX text: <http://jena.apache.org/text#>",

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
new file mode 100644
index 0000000..9d99a29
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndexWithLangField.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.Dataset;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
+import org.apache.jena.rdf.model.Resource;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TestDatasetWithLuceneTextIndexWithLangField extends AbstractTestDatasetWithTextIndex {
+    
+    private static final String SPEC_BASE = "http://example.org/spec#";
+    private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+    private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+    private static final String SPEC;
+    static {
+        SPEC = StrUtils.strjoinNL(
+                    "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+                    "prefix ja:   <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+                    "prefix tdb:  <http://jena.hpl.hp.com/2008/tdb#>",
+                    "prefix text: <http://jena.apache.org/text#>",
+                    "prefix :     <" + SPEC_BASE + ">",
+                    "",
+                    "[] ja:loadClass    \"org.apache.jena.query.text.TextQuery\" .",
+                    "text:TextDataset      rdfs:subClassOf   ja:RDFDataset .",
+                    "text:TextIndexLucene  rdfs:subClassOf   text:TextIndex .",
+                    
+                    ":" + SPEC_ROOT_LOCAL,
+                    "    a              text:TextDataset ;",
+                    "    text:dataset   :dataset ;",
+                    "    text:index     :indexLucene ;",
+                    "    .",
+                    "",
+                    ":dataset",
+                    "    a               ja:RDFDataset ;",
+                    "    ja:defaultGraph :graph ;",
+                    ".",
+                    ":graph",
+                    "    a               ja:MemoryModel ;",
+                    ".",
+                    "",
+                    ":indexLucene",
+                    "    a text:TextIndexLucene ;",
+                    "    text:directory \"mem\" ;",
+                    "    text:entityMap :entMap ;",
+                    "    .",
+                    "",
+                    ":entMap",
+                    "    a text:EntityMap ;",
+                    "    text:entityField      \"uri\" ;",
+                    "    text:defaultField     \"label\" ;",
+                    "    text:langField        \"language\" ;",
+                    "    text:map (",
+                    "         [ text:field \"label\" ; text:predicate rdfs:label ]",
+                    "         [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+                    "         ) ."
+                    );
+    }
+    
+    @Before
+    public void before() {
+        Reader reader = new StringReader(SPEC);
+        Model specModel = ModelFactory.createDefaultModel();
+        specModel.read(reader, "", "TURTLE");
+        TextAssembler.init();           
+        Resource root = specModel.getResource(SPEC_ROOT_URI);
+        dataset = (Dataset) Assembler.general.open(root);
+    }
+    
+    @After
+    public void after() {
+        dataset.close();
+    }
+    
+    @Test
+    public void testLiteralLanguageSearch(){
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "ParisInEnglish>",
+                "  rdfs:label 'Paris, capital of France'@en",
+                ".",
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + "ParisInFrench>",
+                "  rdfs:label 'Paris, capitale de la France'@fr",
+                "."
+        );
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label 'paris' 'lang:en' 10 ) .",
+                "}"
+        );
+        Set<String> expectedURIs = new HashSet<>() ;
+        expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/ParisInEnglish")) ;
+        doTestSearch(turtle, queryString, expectedURIs);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
index dc02671..6e743a2 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
@@ -48,7 +48,9 @@ public class TestLuceneWithMultipleThreads
     private static final EntityDefinition entDef;
     
     static {
-        entDef = new EntityDefinition("uri", "label", "graph", RDFS.label.asNode());
+        entDef = new EntityDefinition("uri", "label");
+        entDef.setGraphField("graph");
+        entDef.setPrimaryPredicate(RDFS.label.asNode());
         StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
         entDef.setAnalyzer("label", analyzer);
     }
@@ -56,7 +58,7 @@ public class TestLuceneWithMultipleThreads
     @Test
     public void testReadInMiddleOfWrite() throws InterruptedException, ExecutionException
     {
-        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
+        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), new TextIndexConfig(entDef));
         final Dataset ds = DatasetFactory.create(dsg);
         final ExecutorService execService = Executors.newSingleThreadExecutor();
         final Future<?> f = execService.submit(new Runnable()
@@ -112,7 +114,7 @@ public class TestLuceneWithMultipleThreads
     @Test
     public void testWriteInMiddleOfRead() throws InterruptedException, ExecutionException
     {
-        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), entDef, null);
+        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(new GraphStoreNullTransactional(), new RAMDirectory(), new TextIndexConfig(entDef));
         final int numReads = 10;
         final Dataset ds = DatasetFactory.create(dsg);
         final ExecutorService execService = Executors.newFixedThreadPool(10);
@@ -180,7 +182,7 @@ public class TestLuceneWithMultipleThreads
     @Test
     public void testIsolation() throws InterruptedException, ExecutionException {
         
-        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), entDef, null);
+        final DatasetGraphText dsg = (DatasetGraphText)TextDatasetFactory.createLucene(DatasetGraphFactory.createMem(), new RAMDirectory(), new TextIndexConfig(entDef));
         
         final int numReaders = 2;
         final List<Future<?>> futures = new ArrayList<Future<?>>(numReaders);

http://git-wip-us.apache.org/repos/asf/jena/blob/1a57c9d3/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
index f3307f0..fa8a08a 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestTextTDB.java
@@ -38,8 +38,9 @@ public class TestTextTDB extends BaseTest
     private static Dataset create() {
         Dataset ds1 = TDBFactory.createDataset() ;
         Directory dir = new RAMDirectory() ;
-        EntityDefinition eDef = new EntityDefinition("iri", "text", RDFS.label) ;
-        TextIndex tidx = new TextIndexLucene(dir, eDef, null) ;
+        EntityDefinition eDef = new EntityDefinition("iri", "text");
+        eDef.setPrimaryPredicate(RDFS.label);
+        TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
         Dataset ds = TextDatasetFactory.create(ds1, tidx) ;
         return ds ;
     }


[4/6] jena git commit: Merge branch 'upstream/master' into jena-text-ml-single-index

Posted by an...@apache.org.
Merge branch 'upstream/master' into jena-text-ml-single-index


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/ed717028
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/ed717028
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/ed717028

Branch: refs/heads/master
Commit: ed7170283975fbed9582caa6347016432e046618
Parents: 1a57c9d 8d5f1cb
Author: Alexis Miara <al...@hotmail.com>
Authored: Tue May 19 14:47:19 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Tue May 19 14:47:19 2015 -0400

----------------------------------------------------------------------
 .../update/UpdateExecuteOperations.java         |  17 +-
 .../arq/examples/update/UpdateProgrammatic.java |   4 +-
 .../arq/examples/update/UpdateReadFromFile.java |   4 +-
 .../src/main/java/arq/cmdline/CmdUpdate.java    |  19 +-
 .../src/main/java/arq/cmdline/ModDataset.java   |   7 +-
 .../main/java/arq/cmdline/ModGraphStore.java    |  95 ---
 jena-arq/src/main/java/arq/load.java            |   4 +-
 jena-arq/src/main/java/arq/update.java          |  69 +-
 .../org/apache/jena/atlas/json/JsonArray.java   |   2 +-
 .../atlas/json/io/parser/TokenizerJSON.java     | 697 +++++++++----------
 .../java/org/apache/jena/query/Dataset.java     |   8 +-
 .../main/java/org/apache/jena/query/Query.java  |  17 +
 .../apache/jena/riot/out/NodeFormatterBase.java |   2 +-
 .../apache/jena/riot/thrift/ThriftConvert.java  |   8 +
 .../apache/jena/sparql/core/VarExprList.java    |  12 +-
 .../sparql/core/assembler/AssemblerUtils.java   |   1 -
 .../core/assembler/GraphStoreAssembler.java     |  47 --
 .../apache/jena/sparql/engine/http/Service.java |   1 -
 .../apache/jena/sparql/lang/arq/ARQParser.java  |   2 +-
 .../jena/sparql/modify/GraphStoreBasic.java     |  19 -
 .../jena/sparql/modify/GraphStoreNull.java      |  20 +-
 .../jena/sparql/modify/GraphStoreWrapper.java   |  13 -
 .../jena/sparql/modify/UpdateEngineBase.java    |   9 +-
 .../jena/sparql/modify/UpdateEngineFactory.java |   6 +-
 .../jena/sparql/modify/UpdateEngineMain.java    |  31 +-
 .../sparql/modify/UpdateEngineNonStreaming.java |  32 +-
 .../sparql/modify/UpdateEngineRegistry.java     |   6 +-
 .../jena/sparql/modify/UpdateEngineWorker.java  | 117 ++--
 .../jena/sparql/modify/UpdateProcessRemote.java |   3 +-
 .../sparql/modify/UpdateProcessRemoteBase.java  |  22 +-
 .../sparql/modify/UpdateProcessRemoteForm.java  |   3 +-
 .../jena/sparql/modify/UpdateProcessorBase.java |  23 +-
 .../modify/UpdateProcessorStreamingBase.java    |  42 +-
 .../java/org/apache/jena/update/GraphStore.java |  15 +-
 .../apache/jena/update/GraphStoreFactory.java   |   7 +
 .../org/apache/jena/update/UpdateAction.java    | 172 +++--
 .../jena/update/UpdateExecutionFactory.java     | 193 +++--
 .../org/apache/jena/update/UpdateProcessor.java |  13 +-
 .../jena/update/UpdateProcessorStreaming.java   |   9 +-
 .../jena/web/DatasetGraphAccessorHTTP.java      |   3 +-
 jena-arq/src/main/java/riotcmd/utf8.java        |  48 +-
 .../query/TestParameterizedSparqlString.java    |   3 +-
 .../jena/riot/lang/TestPipedRDFIterators.java   |   6 +-
 .../jena/riot/system/TestIO_JenaReaders.java    |   4 +-
 .../jena/riot/system/TestIO_JenaWriters.java    |   4 +-
 .../apache/jena/riot/thrift/TS_RDFThrift.java   |   1 -
 .../apache/jena/riot/thrift/TestThriftTerm.java |  48 +-
 .../sparql/core/AbstractTestDynamicDataset.java |   5 +-
 .../sparql/core/TestDatasetGraphWithLock.java   |   4 +-
 .../jena/sparql/core/TestDynamicDatasetMem.java |   8 +-
 .../sparql/engine/index/TestIndexTable.java     |   4 +-
 .../apache/jena/sparql/graph/GraphsTests.java   |  33 +-
 .../apache/jena/sparql/graph/TestDatasets.java  |  20 +-
 .../apache/jena/sparql/junit/EarlTestCase.java  |   8 +-
 .../org/apache/jena/sparql/junit/QueryTest.java |  10 +-
 .../jena/sparql/junit/SurpressedTest.java       |   2 +-
 .../apache/jena/sparql/junit/SyntaxTest.java    |   2 +-
 .../jena/sparql/junit/SyntaxUpdateTest.java     |   2 +-
 .../jena/sparql/junit/TestSerialization.java    |   2 +-
 .../apache/jena/sparql/junit/UpdateTest.java    |   6 +-
 .../sparql/modify/AbstractTestUpdateBase.java   |  50 +-
 .../sparql/modify/AbstractTestUpdateGraph.java  |  49 +-
 .../modify/AbstractTestUpdateGraphMgt.java      |  10 +-
 .../jena/sparql/modify/TestUpdateGraphMem.java  |   9 +-
 .../sparql/modify/TestUpdateGraphMgtMem.java    |  15 +-
 .../sparql/modify/TestUpdateOperations.java     |  17 +-
 .../apache/jena/sparql/util/TestFmtUtils.java   |   6 +-
 .../apache/jena/atlas/io/CharStreamReader.java  |  23 +-
 .../main/java/org/apache/jena/atlas/io/IO.java  |  13 +-
 .../org/apache/jena/atlas/io/InStreamASCII.java |   4 +-
 .../org/apache/jena/atlas/io/InStreamUTF8.java  |   5 +-
 .../apache/jena/atlas/io/IndentedWriter.java    |   7 +-
 .../apache/jena/atlas/io/PeekInputStream.java   |  98 ++-
 .../org/apache/jena/atlas/io/PeekReader.java    |   3 +-
 .../org/apache/jena/atlas/iterator/Iter.java    | 152 +---
 .../java/org/apache/jena/atlas/lib/Chars.java   |   2 -
 .../java/org/apache/jena/atlas/lib/IRILib.java  |   1 -
 .../jena/atlas/lib/TestDateTimeUtils.java       |  10 +-
 .../apache/jena/datatypes/xsd/XSDDatatype.java  |   2 +-
 .../org/apache/jena/n3/N3IndentedWriter.java    |   1 -
 .../org/apache/jena/n3/N3JenaWriterCommon.java  |  17 +-
 .../java/org/apache/jena/n3/N3JenaWriterPP.java |  10 +-
 .../jena/n3/turtle/parser/TurtleParser.java     |   7 +-
 .../jena/ontology/impl/OWLLiteProfile.java      |   2 -
 .../rdfxml/xmlinput/impl/AbsXMLContext.java     |   7 -
 .../jena/reasoner/rulesys/FBRuleInfGraph.java   |   4 +-
 .../jena/reasoner/rulesys/builtins/Bound.java   |   1 -
 .../jena/reasoner/rulesys/builtins/Drop.java    |   1 -
 .../jena/reasoner/rulesys/builtins/Remove.java  |   1 -
 .../jena/reasoner/rulesys/builtins/Unbound.java |   1 -
 .../jena/reasoner/rulesys/impl/Generator.java   |   1 -
 .../jena/reasoner/rulesys/impl/LPRuleStore.java |   1 -
 .../reasoner/rulesys/impl/RuleClauseCode.java   |   4 +-
 .../jena/shared/impl/PrefixMappingImpl.java     |   2 +-
 .../jena/ontology/impl/TestAllDifferent.java    |   2 +-
 .../jena/ontology/impl/TestClassExpression.java | 112 +--
 .../jena/ontology/impl/TestIndividual.java      |  42 +-
 .../apache/jena/ontology/impl/TestOntTools.java |   3 +-
 .../apache/jena/ontology/impl/TestOntology.java |   8 +-
 .../apache/jena/ontology/impl/TestProperty.java |  54 +-
 .../apache/jena/ontology/impl/TestResource.java |  50 +-
 .../model/test/AbstractContainerMethods.java    |   2 +-
 .../rdf/model/test/AbstractModelTestBase.java   |   4 +-
 .../jena/rdf/model/test/IsomorphicTests.java    |   4 +-
 .../jena/rdf/model/test/TestAddAndContains.java |   4 +-
 .../jena/rdf/model/test/TestAddModel.java       |   4 +-
 .../jena/rdf/model/test/TestConcurrency.java    |   4 +-
 .../rdf/model/test/TestConcurrencyNesting.java  |   2 +-
 .../rdf/model/test/TestConcurrencyParallel.java |   2 +-
 .../rdf/model/test/TestCopyInOutOfModel.java    |   2 +-
 .../org/apache/jena/rdf/model/test/TestGet.java |   4 +-
 .../jena/rdf/model/test/TestIterators.java      |   2 +-
 .../jena/rdf/model/test/TestListStatements.java |   4 +-
 .../jena/rdf/model/test/TestListSubjects.java   |   2 +-
 .../rdf/model/test/TestLiteralsInModel.java     |   2 +-
 .../jena/rdf/model/test/TestModelEvents.java    |   2 +-
 .../rdf/model/test/TestModelSetOperations.java  |   4 +-
 .../rdf/model/test/TestObjectOfProperties.java  |   2 +-
 .../apache/jena/rdf/model/test/TestObjects.java |   4 +-
 .../rdf/model/test/TestReifiedStatements.java   |   2 +-
 .../rdf/model/test/TestResourceMethods.java     |   2 +-
 .../jena/rdf/model/test/TestSelectorUse.java    |   2 +-
 .../jena/rdf/model/test/TestSeqMethods.java     |   2 +-
 .../model/test/TestSimpleListStatements.java    |   2 +-
 .../jena/rdf/model/test/TestSimpleSelector.java |   2 +-
 .../rdf/model/test/TestStatementCreation.java   |   4 +-
 .../rdf/model/test/TestStatementMethods.java    |   2 +-
 .../jena/rdfxml/xmloutput/PrettyWriterTest.java |   2 +-
 .../jena/rdfxml/xmloutput/TestXMLFeatures.java  |   4 +-
 .../jena/propertytable/graph/GraphCSVTest.java  |   4 +-
 .../arq/querybuilder/AbstractQueryBuilder.java  |  28 +-
 .../jena/arq/querybuilder/AskBuilder.java       |  12 +
 .../jena/arq/querybuilder/ConstructBuilder.java |  31 +-
 .../jena/arq/querybuilder/SelectBuilder.java    |  31 +-
 .../arq/querybuilder/clauses/SelectClause.java  |  33 +
 .../arq/querybuilder/clauses/WhereClause.java   |  20 +
 .../querybuilder/handlers/DatasetHandler.java   |  41 +-
 .../jena/arq/querybuilder/handlers/Handler.java |   1 +
 .../querybuilder/handlers/SelectHandler.java    |  48 +-
 .../handlers/SolutionModifierHandler.java       |  12 +-
 .../arq/querybuilder/handlers/WhereHandler.java |  37 +-
 .../jena/arq/AbstractRegexpBasedTest.java       |   1 +
 .../querybuilder/clauses/SelectClauseTest.java  |  29 +
 .../querybuilder/clauses/WhereClauseTest.java   | 100 ++-
 .../handlers/SelectHandlerTest.java             |  25 +
 .../querybuilder/handlers/WhereHandlerTest.java |  22 +
 .../org/apache/jena/iri/TestIRIFactory.java     |   2 +-
 .../java/org/apache/jena/sdb/SDBFactory.java    |   2 +-
 .../apache/jena/sdb/modify/UpdateEngineSDB.java |   6 +-
 .../apache/jena/sdb/store/DatasetGraphSDB.java  |  17 -
 .../jena/sdb/test/modify/TestSPARQLUpdate.java  |   7 +-
 .../sdb/test/modify/TestSPARQLUpdateMgt.java    |   8 +-
 .../apache/jena/tdb/modify/UpdateEngineTDB.java |  12 +-
 .../apache/jena/tdb/store/DatasetGraphTDB.java  |  14 -
 .../transaction/DatasetGraphTransaction.java    | 190 ++---
 .../main/java/tdb/cmdline/ModTDBGraphStore.java |  51 --
 jena-tdb/src/main/java/tdb/tdbupdate.java       |  15 +-
 .../jena/tdb/store/TestDynamicDatasetTDB.java   |   3 +
 .../apache/jena/tdb/store/Test_SPARQL_TDB.java  |   3 +-
 159 files changed, 1725 insertions(+), 1959 deletions(-)
----------------------------------------------------------------------



[6/6] jena git commit: Add back constructor to EntityDefinition for compatibility.

Posted by an...@apache.org.
Add back constructor to EntityDefinition for compatibility.

Fix un-generics.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/66a1eda8
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/66a1eda8
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/66a1eda8

Branch: refs/heads/master
Commit: 66a1eda82eeee2d8f551fac06d6b0a2672decdc2
Parents: 086b05c
Author: Andy Seaborne <an...@apache.org>
Authored: Mon May 25 13:10:34 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Mon May 25 13:10:34 2015 +0100

----------------------------------------------------------------------
 .../jena/query/text/EntityDefinition.java       | 70 +++++++++++++++++---
 .../org/apache/jena/query/text/TextQueryPF.java |  4 +-
 .../apache/jena/query/text/analyzer/Util.java   |  4 +-
 ...ractTestDatasetWithLuceneGraphTextIndex.java |  2 +-
 .../jena/query/text/TestBuildTextDataset.java   |  2 +-
 .../text/TestLuceneWithMultipleThreads.java     |  2 +-
 6 files changed, 68 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
index 30b048a..2a68247 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java
@@ -33,17 +33,16 @@ import org.apache.lucene.analysis.Analyzer ;
  * Definition of a "document"
  */
 public class EntityDefinition {
-    private final Map<Node, String>      predicateToField = new HashMap<>() ;
-    private final Map<String, Analyzer>    fieldToAnalyzer  = new HashMap<>();
+    private final Map<Node, String>          predicateToField = new HashMap<>() ;
+    private final Map<String, Analyzer>      fieldToAnalyzer  = new HashMap<>() ;
     private final ListMultimap<String, Node> fieldToPredicate = ArrayListMultimap.create() ;
-    private final Collection<String>     fields           = Collections.unmodifiableCollection(fieldToPredicate.keys()) ;
+    private final Collection<String>         fields           = Collections.unmodifiableCollection(fieldToPredicate.keys()) ;
     // private final Collection<String> fields =
     // Collections.unmodifiableCollection(fieldToPredicate.keySet()) ;
-    private final String                 entityField ;
-    private final String                 primaryField ;
-    private String                 graphField ;
-    private String                 langField ;
-    //private final Node                   primaryPredicate ;
+    private final String                     entityField ;
+    private final String                     primaryField ;
+    private String                           graphField = null ;
+    private String                           langField ;
 
     /**
      * @param entityField
@@ -56,6 +55,61 @@ public class EntityDefinition {
         this.primaryField = primaryField ;
     }
 
+    /**
+     * @param entityField
+     *            The entity being indexed (e.g. it's URI).
+     * @param primaryField
+     *            The primary/default field to search
+     * @param graphField
+     *            The field that stores graph URI, or null
+     */
+    public EntityDefinition(String entityField, String primaryField, String graphField) {
+        this(entityField, primaryField) ;
+        setGraphField(graphField);
+    }
+
+    /**
+     * @param entityField
+     *            The entity being indexed (e.g. it's URI).
+     * @param primaryField
+     *            The primary/default field to search
+     * @param primaryPredicate
+     *            The property associated with the primary/default field
+     */
+    public EntityDefinition(String entityField, String primaryField, Resource primaryPredicate) {
+        this(entityField, primaryField) ;
+        setPrimaryPredicate(primaryPredicate);
+    }
+
+    /**
+     * @param entityField
+     *            The entity being indexed (e.g. it's URI).
+     * @param primaryField
+     *            The primary/default field to search
+     * @param primaryPredicate
+     *            The property associated with the primary/default field
+     */
+    public EntityDefinition(String entityField, String primaryField, Node primaryPredicate) {
+        this(entityField, primaryField) ;
+        setPrimaryPredicate(primaryPredicate);
+    }
+
+    /**
+     * @param entityField
+     *            The entity being indexed (e.g. it's URI).
+     * @param primaryField
+     *            The primary/default field to search
+     * @param graphField
+     *            The field that stores graph URI, or null
+     * @param primaryPredicate
+     *            The property associated with the primary/default field
+     */
+    public EntityDefinition(String entityField, String primaryField, String graphField, Node primaryPredicate) {
+        this(entityField, primaryField) ;
+        setGraphField(graphField);
+        setPrimaryPredicate(primaryPredicate) ;
+    }
+    
     public String getEntityField() {
         return entityField ;
     }

http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index d568232..81dc412 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -18,7 +18,6 @@
 
 package org.apache.jena.query.text ;
 
-import java.util.Iterator;
 import java.util.List ;
 
 import org.apache.jena.atlas.iterator.Iter ;
@@ -110,8 +109,7 @@ public class TextQueryPF extends PropertyFunctionBase {
     private String extractArg(String prefix, PropFuncArg argObject) {
         String value = null;
         int pos = 0;
-        for (Iterator it = argObject.getArgList().iterator(); it.hasNext(); ) {
-            Node node = (Node)it.next();
+        for (Node node : argObject.getArgList()) {
             if (node.isLiteral()) {
                 String arg = node.getLiteral().toString();
                 if (arg.startsWith(prefix + ":")) {

http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
index 11dd683..c8e3490 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/analyzer/Util.java
@@ -26,7 +26,7 @@ import java.util.Hashtable;
 
 public class Util {
 
-    private static Hashtable<String, Class> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
+    private static Hashtable<String, Class<?>> analyzersClasses; //mapping between ISO2-letter language and lucene existing analyzersClasses
     private static Hashtable<String, Analyzer> cache = new Hashtable<>(); //to avoid unnecessary multi instantiation
 
     static {
@@ -48,7 +48,7 @@ public class Util {
             Class<?> className = analyzersClasses.get(lang);
             if (className == null)
                 return null;
-            Constructor constructor = className.getConstructor(Version.class);
+            Constructor<?> constructor = className.getConstructor(Version.class);
             Analyzer analyzer = (Analyzer)constructor.newInstance(ver);
             cache.put(lang, analyzer);
             return analyzer;

http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
index 1670f63..720e372 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneGraphTextIndex.java
@@ -38,7 +38,7 @@ public class AbstractTestDatasetWithLuceneGraphTextIndex extends AbstractTestDat
         Directory dir = new RAMDirectory() ;
         EntityDefinition eDef = new EntityDefinition("iri", "text");
         eDef.setGraphField("graph");
-        eDef.setPrimaryPredicate(RDFS.label.asNode());
+        eDef.setPrimaryPredicate(RDFS.label);
         eDef.set("comment", RDFS.comment.asNode()) ; // some tests require indexing rdfs:comment
         TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef)) ;
         dataset = TextDatasetFactory.create(ds1, tidx) ;

http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
index 02d02f9..2335e40 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestBuildTextDataset.java
@@ -111,7 +111,7 @@ public class TestBuildTextDataset extends BaseTest
 
         // Define the index mapping
         EntityDefinition entDef = new EntityDefinition("uri", "text");
-        entDef.setPrimaryPredicate(RDFS.label.asNode());
+        entDef.setPrimaryPredicate(RDFS.label);
 
         // Lucene, in memory.
         Directory dir = new RAMDirectory() ;

http://git-wip-us.apache.org/repos/asf/jena/blob/66a1eda8/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
index 6e743a2..5bbe7c3 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestLuceneWithMultipleThreads.java
@@ -50,7 +50,7 @@ public class TestLuceneWithMultipleThreads
     static {
         entDef = new EntityDefinition("uri", "label");
         entDef.setGraphField("graph");
-        entDef.setPrimaryPredicate(RDFS.label.asNode());
+        entDef.setPrimaryPredicate(RDFS.label);
         StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
         entDef.setAnalyzer("label", analyzer);
     }


[5/6] jena git commit: Merge commit 'refs/pull/64/head' of github.com:apache/jena

Posted by an...@apache.org.
Merge commit 'refs/pull/64/head' of github.com:apache/jena

This closes #64.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/086b05c8
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/086b05c8
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/086b05c8

Branch: refs/heads/master
Commit: 086b05c8450ee08005ba18861502280fea17c443
Parents: dc19466 ed71702
Author: Andy Seaborne <an...@apache.org>
Authored: Mon May 25 12:55:13 2015 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Mon May 25 12:55:13 2015 +0100

----------------------------------------------------------------------
 .../main/java/examples/JenaTextExample1.java    |   6 +-
 .../java/org/apache/jena/query/text/Entity.java |   9 +-
 .../jena/query/text/EntityDefinition.java       |  70 ++----
 .../jena/query/text/TextDatasetFactory.java     |  50 ++--
 .../apache/jena/query/text/TextIndexConfig.java |  61 +++++
 .../apache/jena/query/text/TextIndexLucene.java |  68 ++++--
 .../query/text/TextIndexLuceneMultilingual.java |  75 ++++++
 .../apache/jena/query/text/TextQueryFuncs.java  |   9 +-
 .../org/apache/jena/query/text/TextQueryPF.java |  41 +++-
 .../apache/jena/query/text/analyzer/Util.java   |  96 ++++++++
 .../assembler/EntityDefinitionAssembler.java    |   8 +-
 .../assembler/LocalizedAnalyzerAssembler.java   |  60 +++++
 .../query/text/assembler/TextAssembler.java     |   1 +
 .../assembler/TextIndexLuceneAssembler.java     |  40 +++-
 .../jena/query/text/assembler/TextVocab.java    |   5 +
 ...ractTestDatasetWithLuceneGraphTextIndex.java |   6 +-
 .../org/apache/jena/query/text/TS_Text.java     |   3 +
 .../jena/query/text/TestBuildTextDataset.java   |   5 +-
 .../text/TestDatasetWithLocalizedAnalyzer.java  | 147 ++++++++++++
 ...tDatasetWithLuceneMultilingualTextIndex.java | 238 +++++++++++++++++++
 ...DatasetWithLuceneTextIndexWithLangField.java | 126 ++++++++++
 .../text/TestLuceneWithMultipleThreads.java     |  10 +-
 .../org/apache/jena/query/text/TestTextTDB.java |   5 +-
 jena-text/testing/TextQuery/data.skos           |  36 +++
 24 files changed, 1047 insertions(+), 128 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/086b05c8/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityDefinitionAssembler.java
----------------------------------------------------------------------